diff options
Diffstat (limited to 'drivers')
139 files changed, 5624 insertions, 2282 deletions
diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index bd5de08ad6fd..0576a7dd32a5 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -157,6 +157,7 @@ int tegra_ahb_enable_smmu(struct device_node *dn) EXPORT_SYMBOL(tegra_ahb_enable_smmu); #endif +#ifdef CONFIG_PM_SLEEP static int tegra_ahb_suspend(struct device *dev) { int i; @@ -176,6 +177,7 @@ static int tegra_ahb_resume(struct device *dev) gizmo_writel(ahb, ahb->ctx[i], tegra_ahb_gizmo[i]); return 0; } +#endif static UNIVERSAL_DEV_PM_OPS(tegra_ahb_pm, tegra_ahb_suspend, diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index c909b7b7d5f1..d70abe77f737 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -42,7 +42,8 @@ #include <linux/swab.h> #include <linux/slab.h> -#define VERSION "0.07" +#define VERSION "1.04" +#define DRIVER_VERSION 0x01 #define PTAG "solos-pci" #define CONFIG_RAM_SIZE 128 @@ -56,16 +57,21 @@ #define FLASH_BUSY 0x60 #define FPGA_MODE 0x5C #define FLASH_MODE 0x58 +#define GPIO_STATUS 0x54 +#define DRIVER_VER 0x50 #define TX_DMA_ADDR(port) (0x40 + (4 * (port))) #define RX_DMA_ADDR(port) (0x30 + (4 * (port))) #define DATA_RAM_SIZE 32768 #define BUF_SIZE 2048 #define OLD_BUF_SIZE 4096 /* For FPGA versions <= 2*/ -#define FPGA_PAGE 528 /* FPGA flash page size*/ -#define SOLOS_PAGE 512 /* Solos flash page size*/ -#define FPGA_BLOCK (FPGA_PAGE * 8) /* FPGA flash block size*/ -#define SOLOS_BLOCK (SOLOS_PAGE * 8) /* Solos flash block size*/ +/* Old boards use ATMEL AD45DB161D flash */ +#define ATMEL_FPGA_PAGE 528 /* FPGA flash page size*/ +#define ATMEL_SOLOS_PAGE 512 /* Solos flash page size*/ +#define ATMEL_FPGA_BLOCK (ATMEL_FPGA_PAGE * 8) /* FPGA block size*/ +#define ATMEL_SOLOS_BLOCK (ATMEL_SOLOS_PAGE * 8) /* Solos block size*/ +/* Current boards use M25P/M25PE SPI flash */ +#define SPI_FLASH_BLOCK (256 * 64) #define RX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2) #define TX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2 + (card->buffer_size)) @@ -122,11 +128,14 @@ struct solos_card { struct sk_buff_head cli_queue[4]; struct sk_buff *tx_skb[4]; struct sk_buff *rx_skb[4]; + unsigned char *dma_bounce; wait_queue_head_t param_wq; wait_queue_head_t fw_wq; int using_dma; + int dma_alignment; int fpga_version; int buffer_size; + int atmel_flash; }; @@ -451,7 +460,6 @@ static ssize_t console_show(struct device *dev, struct device_attribute *attr, len = skb->len; memcpy(buf, skb->data, len); - dev_dbg(&card->dev->dev, "len: %d\n", len); kfree_skb(skb); return len; @@ -498,6 +506,78 @@ static ssize_t console_store(struct device *dev, struct device_attribute *attr, return err?:count; } +struct geos_gpio_attr { + struct device_attribute attr; + int offset; +}; + +#define SOLOS_GPIO_ATTR(_name, _mode, _show, _store, _offset) \ + struct geos_gpio_attr gpio_attr_##_name = { \ + .attr = __ATTR(_name, _mode, _show, _store), \ + .offset = _offset } + +static ssize_t geos_gpio_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + struct geos_gpio_attr *gattr = container_of(attr, struct geos_gpio_attr, attr); + struct solos_card *card = pci_get_drvdata(pdev); + uint32_t data32; + + if (count != 1 && (count != 2 || buf[1] != '\n')) + return -EINVAL; + + spin_lock_irq(&card->param_queue_lock); + data32 = ioread32(card->config_regs + GPIO_STATUS); + if (buf[0] == '1') { + data32 |= 1 << gattr->offset; + iowrite32(data32, card->config_regs + GPIO_STATUS); + } else if (buf[0] == '0') { + data32 &= ~(1 << gattr->offset); + iowrite32(data32, card->config_regs + GPIO_STATUS); + } else { + count = -EINVAL; + } + spin_lock_irq(&card->param_queue_lock); + return count; +} + +static ssize_t geos_gpio_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + struct geos_gpio_attr *gattr = container_of(attr, struct geos_gpio_attr, attr); + struct solos_card *card = pci_get_drvdata(pdev); + uint32_t data32; + + data32 = ioread32(card->config_regs + GPIO_STATUS); + data32 = (data32 >> gattr->offset) & 1; + + return sprintf(buf, "%d\n", data32); +} + +static ssize_t hardware_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + struct geos_gpio_attr *gattr = container_of(attr, struct geos_gpio_attr, attr); + struct solos_card *card = pci_get_drvdata(pdev); + uint32_t data32; + + data32 = ioread32(card->config_regs + GPIO_STATUS); + switch (gattr->offset) { + case 0: + /* HardwareVersion */ + data32 = data32 & 0x1F; + break; + case 1: + /* HardwareVariant */ + data32 = (data32 >> 5) & 0x0F; + break; + } + return sprintf(buf, "%d\n", data32); +} + static DEVICE_ATTR(console, 0644, console_show, console_store); @@ -506,6 +586,14 @@ static DEVICE_ATTR(console, 0644, console_show, console_store); #include "solos-attrlist.c" +static SOLOS_GPIO_ATTR(GPIO1, 0644, geos_gpio_show, geos_gpio_store, 9); +static SOLOS_GPIO_ATTR(GPIO2, 0644, geos_gpio_show, geos_gpio_store, 10); +static SOLOS_GPIO_ATTR(GPIO3, 0644, geos_gpio_show, geos_gpio_store, 11); +static SOLOS_GPIO_ATTR(GPIO4, 0644, geos_gpio_show, geos_gpio_store, 12); +static SOLOS_GPIO_ATTR(GPIO5, 0644, geos_gpio_show, geos_gpio_store, 13); +static SOLOS_GPIO_ATTR(PushButton, 0444, geos_gpio_show, NULL, 14); +static SOLOS_GPIO_ATTR(HardwareVersion, 0444, hardware_show, NULL, 0); +static SOLOS_GPIO_ATTR(HardwareVariant, 0444, hardware_show, NULL, 1); #undef SOLOS_ATTR_RO #undef SOLOS_ATTR_RW @@ -522,6 +610,23 @@ static struct attribute_group solos_attr_group = { .name = "parameters", }; +static struct attribute *gpio_attrs[] = { + &gpio_attr_GPIO1.attr.attr, + &gpio_attr_GPIO2.attr.attr, + &gpio_attr_GPIO3.attr.attr, + &gpio_attr_GPIO4.attr.attr, + &gpio_attr_GPIO5.attr.attr, + &gpio_attr_PushButton.attr.attr, + &gpio_attr_HardwareVersion.attr.attr, + &gpio_attr_HardwareVariant.attr.attr, + NULL +}; + +static struct attribute_group gpio_attr_group = { + .attrs = gpio_attrs, + .name = "gpio", +}; + static int flash_upgrade(struct solos_card *card, int chip) { const struct firmware *fw; @@ -533,16 +638,25 @@ static int flash_upgrade(struct solos_card *card, int chip) switch (chip) { case 0: fw_name = "solos-FPGA.bin"; - blocksize = FPGA_BLOCK; + if (card->atmel_flash) + blocksize = ATMEL_FPGA_BLOCK; + else + blocksize = SPI_FLASH_BLOCK; break; case 1: fw_name = "solos-Firmware.bin"; - blocksize = SOLOS_BLOCK; + if (card->atmel_flash) + blocksize = ATMEL_SOLOS_BLOCK; + else + blocksize = SPI_FLASH_BLOCK; break; case 2: if (card->fpga_version > LEGACY_BUFFERS){ fw_name = "solos-db-FPGA.bin"; - blocksize = FPGA_BLOCK; + if (card->atmel_flash) + blocksize = ATMEL_FPGA_BLOCK; + else + blocksize = SPI_FLASH_BLOCK; } else { dev_info(&card->dev->dev, "FPGA version doesn't support" " daughter board upgrades\n"); @@ -552,7 +666,10 @@ static int flash_upgrade(struct solos_card *card, int chip) case 3: if (card->fpga_version > LEGACY_BUFFERS){ fw_name = "solos-Firmware.bin"; - blocksize = SOLOS_BLOCK; + if (card->atmel_flash) + blocksize = ATMEL_SOLOS_BLOCK; + else + blocksize = SPI_FLASH_BLOCK; } else { dev_info(&card->dev->dev, "FPGA version doesn't support" " daughter board upgrades\n"); @@ -568,6 +685,9 @@ static int flash_upgrade(struct solos_card *card, int chip) dev_info(&card->dev->dev, "Flash upgrade starting\n"); + /* New FPGAs require driver version before permitting flash upgrades */ + iowrite32(DRIVER_VERSION, card->config_regs + DRIVER_VER); + numblocks = fw->size / blocksize; dev_info(&card->dev->dev, "Firmware size: %zd\n", fw->size); dev_info(&card->dev->dev, "Number of blocks: %d\n", numblocks); @@ -597,9 +717,13 @@ static int flash_upgrade(struct solos_card *card, int chip) /* dev_info(&card->dev->dev, "Set FPGA Flash mode to Block Write\n"); */ iowrite32(((chip * 2) + 1), card->config_regs + FLASH_MODE); - /* Copy block to buffer, swapping each 16 bits */ + /* Copy block to buffer, swapping each 16 bits for Atmel flash */ for(i = 0; i < blocksize; i += 4) { - uint32_t word = swahb32p((uint32_t *)(fw->data + offset + i)); + uint32_t word; + if (card->atmel_flash) + word = swahb32p((uint32_t *)(fw->data + offset + i)); + else + word = *(uint32_t *)(fw->data + offset + i); if(card->fpga_version > LEGACY_BUFFERS) iowrite32(word, FLASH_BUF + i); else @@ -961,7 +1085,12 @@ static uint32_t fpga_tx(struct solos_card *card) tx_started |= 1 << port; oldskb = skb; /* We're done with this skb already */ } else if (skb && card->using_dma) { - SKB_CB(skb)->dma_addr = pci_map_single(card->dev, skb->data, + unsigned char *data = skb->data; + if ((unsigned long)data & card->dma_alignment) { + data = card->dma_bounce + (BUF_SIZE * port); + memcpy(data, skb->data, skb->len); + } + SKB_CB(skb)->dma_addr = pci_map_single(card->dev, data, skb->len, PCI_DMA_TODEVICE); card->tx_skb[port] = skb; iowrite32(SKB_CB(skb)->dma_addr, @@ -1133,18 +1262,33 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) db_fpga_upgrade = db_firmware_upgrade = 0; } + /* Stopped using Atmel flash after 0.03-38 */ + if (fpga_ver < 39) + card->atmel_flash = 1; + else + card->atmel_flash = 0; + + data32 = ioread32(card->config_regs + PORTS); + card->nr_ports = (data32 & 0x000000FF); + if (card->fpga_version >= DMA_SUPPORTED) { pci_set_master(dev); card->using_dma = 1; + if (1) { /* All known FPGA versions so far */ + card->dma_alignment = 3; + card->dma_bounce = kmalloc(card->nr_ports * BUF_SIZE, GFP_KERNEL); + if (!card->dma_bounce) { + dev_warn(&card->dev->dev, "Failed to allocate DMA bounce buffers\n"); + /* Fallback to MMIO doesn't work */ + goto out_unmap_both; + } + } } else { card->using_dma = 0; /* Set RX empty flag for all ports */ iowrite32(0xF0, card->config_regs + FLAGS_ADDR); } - data32 = ioread32(card->config_regs + PORTS); - card->nr_ports = (data32 & 0x000000FF); - pci_set_drvdata(dev, card); tasklet_init(&card->tlet, solos_bh, (unsigned long)card); @@ -1179,6 +1323,10 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) if (err) goto out_free_irq; + if (card->fpga_version >= DMA_SUPPORTED && + sysfs_create_group(&card->dev->dev.kobj, &gpio_attr_group)) + dev_err(&card->dev->dev, "Could not register parameter group for GPIOs\n"); + return 0; out_free_irq: @@ -1187,6 +1335,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) tasklet_kill(&card->tlet); out_unmap_both: + kfree(card->dma_bounce); pci_set_drvdata(dev, NULL); pci_iounmap(dev, card->buffers); out_unmap_config: @@ -1289,11 +1438,16 @@ static void fpga_remove(struct pci_dev *dev) iowrite32(1, card->config_regs + FPGA_MODE); (void)ioread32(card->config_regs + FPGA_MODE); + if (card->fpga_version >= DMA_SUPPORTED) + sysfs_remove_group(&card->dev->dev.kobj, &gpio_attr_group); + atm_remove(card); free_irq(dev->irq, card); tasklet_kill(&card->tlet); + kfree(card->dma_bounce); + /* Release device from reset */ iowrite32(0, card->config_regs + FPGA_MODE); (void)ioread32(card->config_regs + FPGA_MODE); diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 147d1a4dd269..17cf7cad601e 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -148,7 +148,7 @@ static int dev_mkdir(const char *name, umode_t mode) struct path path; int err; - dentry = kern_path_create(AT_FDCWD, name, &path, 1); + dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); if (IS_ERR(dentry)) return PTR_ERR(dentry); diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index 460e22dee36d..a3f79c495a41 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -298,6 +298,8 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, struct sg_table *sg_table, enum dma_data_direction direction) { + might_sleep(); + if (WARN_ON(!attach || !attach->dmabuf || !sg_table)) return; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bb3d9be3b1b4..89576a0b3f2e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -61,15 +61,29 @@ #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ -#define RBD_MAX_SNAP_NAME_LEN 32 +#define RBD_SNAP_DEV_NAME_PREFIX "snap_" +#define RBD_MAX_SNAP_NAME_LEN \ + (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) + #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ #define RBD_MAX_OPT_LEN 1024 #define RBD_SNAP_HEAD_NAME "-" +/* This allows a single page to hold an image name sent by OSD */ +#define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1) #define RBD_IMAGE_ID_LEN_MAX 64 + #define RBD_OBJ_PREFIX_LEN_MAX 64 +/* Feature bits */ + +#define RBD_FEATURE_LAYERING 1 + +/* Features supported by this (client software) implementation. */ + +#define RBD_FEATURES_ALL (0) + /* * An RBD device name will be "rbd#", where the "rbd" comes from * RBD_DRV_NAME above, and # is a unique integer identifier. @@ -101,6 +115,27 @@ struct rbd_image_header { u64 obj_version; }; +/* + * An rbd image specification. + * + * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely + * identify an image. + */ +struct rbd_spec { + u64 pool_id; + char *pool_name; + + char *image_id; + size_t image_id_len; + char *image_name; + size_t image_name_len; + + u64 snap_id; + char *snap_name; + + struct kref kref; +}; + struct rbd_options { bool read_only; }; @@ -155,11 +190,8 @@ struct rbd_snap { }; struct rbd_mapping { - char *snap_name; - u64 snap_id; u64 size; u64 features; - bool snap_exists; bool read_only; }; @@ -173,7 +205,6 @@ struct rbd_device { struct gendisk *disk; /* blkdev's gendisk and rq */ u32 image_format; /* Either 1 or 2 */ - struct rbd_options rbd_opts; struct rbd_client *rbd_client; char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ @@ -181,17 +212,17 @@ struct rbd_device { spinlock_t lock; /* queue lock */ struct rbd_image_header header; - char *image_id; - size_t image_id_len; - char *image_name; - size_t image_name_len; + bool exists; + struct rbd_spec *spec; + char *header_name; - char *pool_name; - int pool_id; struct ceph_osd_event *watch_event; struct ceph_osd_request *watch_request; + struct rbd_spec *parent_spec; + u64 parent_overlap; + /* protects updating the header */ struct rw_semaphore header_rwsem; @@ -204,6 +235,7 @@ struct rbd_device { /* sysfs related */ struct device dev; + unsigned long open_count; }; static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ @@ -218,7 +250,7 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev); static int rbd_dev_snaps_register(struct rbd_device *rbd_dev); static void rbd_dev_release(struct device *dev); -static void __rbd_remove_snap_dev(struct rbd_snap *snap); +static void rbd_remove_snap_dev(struct rbd_snap *snap); static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count); @@ -258,17 +290,8 @@ static struct device rbd_root_dev = { # define rbd_assert(expr) ((void) 0) #endif /* !RBD_DEBUG */ -static struct device *rbd_get_dev(struct rbd_device *rbd_dev) -{ - return get_device(&rbd_dev->dev); -} - -static void rbd_put_dev(struct rbd_device *rbd_dev) -{ - put_device(&rbd_dev->dev); -} - -static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver); +static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver); +static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); static int rbd_open(struct block_device *bdev, fmode_t mode) { @@ -277,8 +300,11 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) return -EROFS; - rbd_get_dev(rbd_dev); + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + (void) get_device(&rbd_dev->dev); set_device_ro(bdev, rbd_dev->mapping.read_only); + rbd_dev->open_count++; + mutex_unlock(&ctl_mutex); return 0; } @@ -287,7 +313,11 @@ static int rbd_release(struct gendisk *disk, fmode_t mode) { struct rbd_device *rbd_dev = disk->private_data; - rbd_put_dev(rbd_dev); + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + rbd_assert(rbd_dev->open_count > 0); + rbd_dev->open_count--; + put_device(&rbd_dev->dev); + mutex_unlock(&ctl_mutex); return 0; } @@ -388,7 +418,7 @@ enum { static match_table_t rbd_opts_tokens = { /* int args above */ /* string args above */ - {Opt_read_only, "mapping.read_only"}, + {Opt_read_only, "read_only"}, {Opt_read_only, "ro"}, /* Alternate spelling */ {Opt_read_write, "read_write"}, {Opt_read_write, "rw"}, /* Alternate spelling */ @@ -441,33 +471,17 @@ static int parse_rbd_opts_token(char *c, void *private) * Get a ceph client with specific addr and configuration, if one does * not exist create it. */ -static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, - size_t mon_addr_len, char *options) +static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) { - struct rbd_options *rbd_opts = &rbd_dev->rbd_opts; - struct ceph_options *ceph_opts; struct rbd_client *rbdc; - rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; - - ceph_opts = ceph_parse_options(options, mon_addr, - mon_addr + mon_addr_len, - parse_rbd_opts_token, rbd_opts); - if (IS_ERR(ceph_opts)) - return PTR_ERR(ceph_opts); - rbdc = rbd_client_find(ceph_opts); - if (rbdc) { - /* using an existing client */ + if (rbdc) /* using an existing client */ ceph_destroy_options(ceph_opts); - } else { + else rbdc = rbd_client_create(ceph_opts); - if (IS_ERR(rbdc)) - return PTR_ERR(rbdc); - } - rbd_dev->rbd_client = rbdc; - return 0; + return rbdc; } /* @@ -492,10 +506,10 @@ static void rbd_client_release(struct kref *kref) * Drop reference to ceph client node. If it's not referenced anymore, release * it. */ -static void rbd_put_client(struct rbd_device *rbd_dev) +static void rbd_put_client(struct rbd_client *rbdc) { - kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); - rbd_dev->rbd_client = NULL; + if (rbdc) + kref_put(&rbdc->kref, rbd_client_release); } /* @@ -524,6 +538,16 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) return false; + /* The bio layer requires at least sector-sized I/O */ + + if (ondisk->options.order < SECTOR_SHIFT) + return false; + + /* If we use u64 in a few spots we may be able to loosen this */ + + if (ondisk->options.order > 8 * sizeof (int) - 1) + return false; + /* * The size of a snapshot header has to fit in a size_t, and * that limits the number of snapshots. @@ -635,6 +659,20 @@ out_err: return -ENOMEM; } +static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) +{ + struct rbd_snap *snap; + + if (snap_id == CEPH_NOSNAP) + return RBD_SNAP_HEAD_NAME; + + list_for_each_entry(snap, &rbd_dev->snaps, node) + if (snap_id == snap->id) + return snap->name; + + return NULL; +} + static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) { @@ -642,7 +680,7 @@ static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) list_for_each_entry(snap, &rbd_dev->snaps, node) { if (!strcmp(snap_name, snap->name)) { - rbd_dev->mapping.snap_id = snap->id; + rbd_dev->spec->snap_id = snap->id; rbd_dev->mapping.size = snap->size; rbd_dev->mapping.features = snap->features; @@ -653,26 +691,23 @@ static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) return -ENOENT; } -static int rbd_dev_set_mapping(struct rbd_device *rbd_dev, char *snap_name) +static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) { int ret; - if (!memcmp(snap_name, RBD_SNAP_HEAD_NAME, + if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME, sizeof (RBD_SNAP_HEAD_NAME))) { - rbd_dev->mapping.snap_id = CEPH_NOSNAP; + rbd_dev->spec->snap_id = CEPH_NOSNAP; rbd_dev->mapping.size = rbd_dev->header.image_size; rbd_dev->mapping.features = rbd_dev->header.features; - rbd_dev->mapping.snap_exists = false; - rbd_dev->mapping.read_only = rbd_dev->rbd_opts.read_only; ret = 0; } else { - ret = snap_by_name(rbd_dev, snap_name); + ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name); if (ret < 0) goto done; - rbd_dev->mapping.snap_exists = true; rbd_dev->mapping.read_only = true; } - rbd_dev->mapping.snap_name = snap_name; + rbd_dev->exists = true; done: return ret; } @@ -695,13 +730,13 @@ static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) u64 segment; int ret; - name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); + name = kmalloc(MAX_OBJ_NAME_SIZE + 1, GFP_NOIO); if (!name) return NULL; segment = offset >> rbd_dev->header.obj_order; - ret = snprintf(name, RBD_MAX_SEG_NAME_LEN, "%s.%012llx", + ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, "%s.%012llx", rbd_dev->header.object_prefix, segment); - if (ret < 0 || ret >= RBD_MAX_SEG_NAME_LEN) { + if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) { pr_err("error formatting segment name for #%llu (%d)\n", segment, ret); kfree(name); @@ -800,77 +835,144 @@ static void zero_bio_chain(struct bio *chain, int start_ofs) } /* - * bio_chain_clone - clone a chain of bios up to a certain length. - * might return a bio_pair that will need to be released. + * Clone a portion of a bio, starting at the given byte offset + * and continuing for the number of bytes indicated. */ -static struct bio *bio_chain_clone(struct bio **old, struct bio **next, - struct bio_pair **bp, - int len, gfp_t gfpmask) -{ - struct bio *old_chain = *old; - struct bio *new_chain = NULL; - struct bio *tail; - int total = 0; - - if (*bp) { - bio_pair_release(*bp); - *bp = NULL; - } +static struct bio *bio_clone_range(struct bio *bio_src, + unsigned int offset, + unsigned int len, + gfp_t gfpmask) +{ + struct bio_vec *bv; + unsigned int resid; + unsigned short idx; + unsigned int voff; + unsigned short end_idx; + unsigned short vcnt; + struct bio *bio; - while (old_chain && (total < len)) { - struct bio *tmp; + /* Handle the easy case for the caller */ - tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); - if (!tmp) - goto err_out; - gfpmask &= ~__GFP_WAIT; /* can't wait after the first */ + if (!offset && len == bio_src->bi_size) + return bio_clone(bio_src, gfpmask); - if (total + old_chain->bi_size > len) { - struct bio_pair *bp; + if (WARN_ON_ONCE(!len)) + return NULL; + if (WARN_ON_ONCE(len > bio_src->bi_size)) + return NULL; + if (WARN_ON_ONCE(offset > bio_src->bi_size - len)) + return NULL; - /* - * this split can only happen with a single paged bio, - * split_bio will BUG_ON if this is not the case - */ - dout("bio_chain_clone split! total=%d remaining=%d" - "bi_size=%u\n", - total, len - total, old_chain->bi_size); + /* Find first affected segment... */ - /* split the bio. We'll release it either in the next - call, or it will have to be released outside */ - bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); - if (!bp) - goto err_out; + resid = offset; + __bio_for_each_segment(bv, bio_src, idx, 0) { + if (resid < bv->bv_len) + break; + resid -= bv->bv_len; + } + voff = resid; - __bio_clone(tmp, &bp->bio1); + /* ...and the last affected segment */ - *next = &bp->bio2; - } else { - __bio_clone(tmp, old_chain); - *next = old_chain->bi_next; - } + resid += len; + __bio_for_each_segment(bv, bio_src, end_idx, idx) { + if (resid <= bv->bv_len) + break; + resid -= bv->bv_len; + } + vcnt = end_idx - idx + 1; + + /* Build the clone */ - tmp->bi_bdev = NULL; - tmp->bi_next = NULL; - if (new_chain) - tail->bi_next = tmp; - else - new_chain = tmp; - tail = tmp; - old_chain = old_chain->bi_next; + bio = bio_alloc(gfpmask, (unsigned int) vcnt); + if (!bio) + return NULL; /* ENOMEM */ - total += tmp->bi_size; + bio->bi_bdev = bio_src->bi_bdev; + bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT); + bio->bi_rw = bio_src->bi_rw; + bio->bi_flags |= 1 << BIO_CLONED; + + /* + * Copy over our part of the bio_vec, then update the first + * and last (or only) entries. + */ + memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx], + vcnt * sizeof (struct bio_vec)); + bio->bi_io_vec[0].bv_offset += voff; + if (vcnt > 1) { + bio->bi_io_vec[0].bv_len -= voff; + bio->bi_io_vec[vcnt - 1].bv_len = resid; + } else { + bio->bi_io_vec[0].bv_len = len; } - rbd_assert(total == len); + bio->bi_vcnt = vcnt; + bio->bi_size = len; + bio->bi_idx = 0; + + return bio; +} + +/* + * Clone a portion of a bio chain, starting at the given byte offset + * into the first bio in the source chain and continuing for the + * number of bytes indicated. The result is another bio chain of + * exactly the given length, or a null pointer on error. + * + * The bio_src and offset parameters are both in-out. On entry they + * refer to the first source bio and the offset into that bio where + * the start of data to be cloned is located. + * + * On return, bio_src is updated to refer to the bio in the source + * chain that contains first un-cloned byte, and *offset will + * contain the offset of that byte within that bio. + */ +static struct bio *bio_chain_clone_range(struct bio **bio_src, + unsigned int *offset, + unsigned int len, + gfp_t gfpmask) +{ + struct bio *bi = *bio_src; + unsigned int off = *offset; + struct bio *chain = NULL; + struct bio **end; + + /* Build up a chain of clone bios up to the limit */ + + if (!bi || off >= bi->bi_size || !len) + return NULL; /* Nothing to clone */ - *old = old_chain; + end = &chain; + while (len) { + unsigned int bi_size; + struct bio *bio; + + if (!bi) + goto out_err; /* EINVAL; ran out of bio's */ + bi_size = min_t(unsigned int, bi->bi_size - off, len); + bio = bio_clone_range(bi, off, bi_size, gfpmask); + if (!bio) + goto out_err; /* ENOMEM */ + + *end = bio; + end = &bio->bi_next; + + off += bi_size; + if (off == bi->bi_size) { + bi = bi->bi_next; + off = 0; + } + len -= bi_size; + } + *bio_src = bi; + *offset = off; - return new_chain; + return chain; +out_err: + bio_chain_put(chain); -err_out: - dout("bio_chain_clone with err\n"); - bio_chain_put(new_chain); return NULL; } @@ -988,8 +1090,9 @@ static int rbd_do_request(struct request *rq, req_data->coll_index = coll_index; } - dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name, - (unsigned long long) ofs, (unsigned long long) len); + dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", + object_name, (unsigned long long) ofs, + (unsigned long long) len, coll, coll_index); osdc = &rbd_dev->rbd_client->client->osdc; req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, @@ -1019,7 +1122,7 @@ static int rbd_do_request(struct request *rq, layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); layout->fl_stripe_count = cpu_to_le32(1); layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); + layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, req, ops); rbd_assert(ret == 0); @@ -1154,8 +1257,6 @@ done: static int rbd_do_op(struct request *rq, struct rbd_device *rbd_dev, struct ceph_snap_context *snapc, - u64 snapid, - int opcode, int flags, u64 ofs, u64 len, struct bio *bio, struct rbd_req_coll *coll, @@ -1167,6 +1268,9 @@ static int rbd_do_op(struct request *rq, int ret; struct ceph_osd_req_op *ops; u32 payload_len; + int opcode; + int flags; + u64 snapid; seg_name = rbd_segment_name(rbd_dev, ofs); if (!seg_name) @@ -1174,7 +1278,18 @@ static int rbd_do_op(struct request *rq, seg_len = rbd_segment_length(rbd_dev, ofs, len); seg_ofs = rbd_segment_offset(rbd_dev, ofs); - payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); + if (rq_data_dir(rq) == WRITE) { + opcode = CEPH_OSD_OP_WRITE; + flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; + snapid = CEPH_NOSNAP; + payload_len = seg_len; + } else { + opcode = CEPH_OSD_OP_READ; + flags = CEPH_OSD_FLAG_READ; + snapc = NULL; + snapid = rbd_dev->spec->snap_id; + payload_len = 0; + } ret = -ENOMEM; ops = rbd_create_rw_ops(1, opcode, payload_len); @@ -1202,41 +1317,6 @@ done: } /* - * Request async osd write - */ -static int rbd_req_write(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, - CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ofs, len, bio, coll, coll_index); -} - -/* - * Request async osd read - */ -static int rbd_req_read(struct request *rq, - struct rbd_device *rbd_dev, - u64 snapid, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - return rbd_do_op(rq, rbd_dev, NULL, - snapid, - CEPH_OSD_OP_READ, - CEPH_OSD_FLAG_READ, - ofs, len, bio, coll, coll_index); -} - -/* * Request sync osd read */ static int rbd_req_sync_read(struct rbd_device *rbd_dev, @@ -1304,7 +1384,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", rbd_dev->header_name, (unsigned long long) notify_id, (unsigned int) opcode); - rc = rbd_refresh_header(rbd_dev, &hver); + rc = rbd_dev_refresh(rbd_dev, &hver); if (rc) pr_warning(RBD_DRV_NAME "%d got notification but failed to " " update snaps: %d\n", rbd_dev->major, rc); @@ -1460,18 +1540,16 @@ static void rbd_rq_fn(struct request_queue *q) { struct rbd_device *rbd_dev = q->queuedata; struct request *rq; - struct bio_pair *bp = NULL; while ((rq = blk_fetch_request(q))) { struct bio *bio; - struct bio *rq_bio, *next_bio = NULL; bool do_write; unsigned int size; - u64 op_size = 0; u64 ofs; int num_segs, cur_seg = 0; struct rbd_req_coll *coll; struct ceph_snap_context *snapc; + unsigned int bio_offset; dout("fetched request\n"); @@ -1483,10 +1561,6 @@ static void rbd_rq_fn(struct request_queue *q) /* deduce our operation (read, write) */ do_write = (rq_data_dir(rq) == WRITE); - - size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * SECTOR_SIZE; - rq_bio = rq->bio; if (do_write && rbd_dev->mapping.read_only) { __blk_end_request_all(rq, -EROFS); continue; @@ -1496,8 +1570,8 @@ static void rbd_rq_fn(struct request_queue *q) down_read(&rbd_dev->header_rwsem); - if (rbd_dev->mapping.snap_id != CEPH_NOSNAP && - !rbd_dev->mapping.snap_exists) { + if (!rbd_dev->exists) { + rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); up_read(&rbd_dev->header_rwsem); dout("request for non-existent snapshot"); spin_lock_irq(q->queue_lock); @@ -1509,6 +1583,10 @@ static void rbd_rq_fn(struct request_queue *q) up_read(&rbd_dev->header_rwsem); + size = blk_rq_bytes(rq); + ofs = blk_rq_pos(rq) * SECTOR_SIZE; + bio = rq->bio; + dout("%s 0x%x bytes at 0x%llx\n", do_write ? "write" : "read", size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); @@ -1528,45 +1606,37 @@ static void rbd_rq_fn(struct request_queue *q) continue; } + bio_offset = 0; do { - /* a bio clone to be passed down to OSD req */ + u64 limit = rbd_segment_length(rbd_dev, ofs, size); + unsigned int chain_size; + struct bio *bio_chain; + + BUG_ON(limit > (u64) UINT_MAX); + chain_size = (unsigned int) limit; dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); - op_size = rbd_segment_length(rbd_dev, ofs, size); + kref_get(&coll->kref); - bio = bio_chain_clone(&rq_bio, &next_bio, &bp, - op_size, GFP_ATOMIC); - if (!bio) { - rbd_coll_end_req_index(rq, coll, cur_seg, - -ENOMEM, op_size); - goto next_seg; - } + /* Pass a cloned bio chain via an osd request */ - /* init OSD command: write or read */ - if (do_write) - rbd_req_write(rq, rbd_dev, - snapc, - ofs, - op_size, bio, - coll, cur_seg); + bio_chain = bio_chain_clone_range(&bio, + &bio_offset, chain_size, + GFP_ATOMIC); + if (bio_chain) + (void) rbd_do_op(rq, rbd_dev, snapc, + ofs, chain_size, + bio_chain, coll, cur_seg); else - rbd_req_read(rq, rbd_dev, - rbd_dev->mapping.snap_id, - ofs, - op_size, bio, - coll, cur_seg); - -next_seg: - size -= op_size; - ofs += op_size; + rbd_coll_end_req_index(rq, coll, cur_seg, + -ENOMEM, chain_size); + size -= chain_size; + ofs += chain_size; cur_seg++; - rq_bio = next_bio; } while (size > 0); kref_put(&coll->kref, rbd_coll_release); - if (bp) - bio_pair_release(bp); spin_lock_irq(q->queue_lock); ceph_put_snap_context(snapc); @@ -1576,28 +1646,47 @@ next_seg: /* * a queue callback. Makes sure that we don't create a bio that spans across * multiple osd objects. One exception would be with a single page bios, - * which we handle later at bio_chain_clone + * which we handle later at bio_chain_clone_range() */ static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, struct bio_vec *bvec) { struct rbd_device *rbd_dev = q->queuedata; - unsigned int chunk_sectors; - sector_t sector; - unsigned int bio_sectors; - int max; + sector_t sector_offset; + sector_t sectors_per_obj; + sector_t obj_sector_offset; + int ret; - chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); - sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); - bio_sectors = bmd->bi_size >> SECTOR_SHIFT; + /* + * Find how far into its rbd object the partition-relative + * bio start sector is to offset relative to the enclosing + * device. + */ + sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector; + sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); + obj_sector_offset = sector_offset & (sectors_per_obj - 1); + + /* + * Compute the number of bytes from that offset to the end + * of the object. Account for what's already used by the bio. + */ + ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT; + if (ret > bmd->bi_size) + ret -= bmd->bi_size; + else + ret = 0; - max = (chunk_sectors - ((sector & (chunk_sectors - 1)) - + bio_sectors)) << SECTOR_SHIFT; - if (max < 0) - max = 0; /* bio_add cannot handle a negative return */ - if (max <= bvec->bv_len && bio_sectors == 0) - return bvec->bv_len; - return max; + /* + * Don't send back more than was asked for. And if the bio + * was empty, let the whole thing through because: "Note + * that a block device *must* allow a single page to be + * added to an empty bio." + */ + rbd_assert(bvec->bv_len <= PAGE_SIZE); + if (ret > (int) bvec->bv_len || !bmd->bi_size) + ret = (int) bvec->bv_len; + + return ret; } static void rbd_free_disk(struct rbd_device *rbd_dev) @@ -1663,13 +1752,13 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) ret = -ENXIO; pr_warning("short header read for image %s" " (want %zd got %d)\n", - rbd_dev->image_name, size, ret); + rbd_dev->spec->image_name, size, ret); goto out_err; } if (!rbd_dev_ondisk_valid(ondisk)) { ret = -ENXIO; pr_warning("invalid header for image %s\n", - rbd_dev->image_name); + rbd_dev->spec->image_name); goto out_err; } @@ -1707,19 +1796,32 @@ static int rbd_read_header(struct rbd_device *rbd_dev, return ret; } -static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) +static void rbd_remove_all_snaps(struct rbd_device *rbd_dev) { struct rbd_snap *snap; struct rbd_snap *next; list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) - __rbd_remove_snap_dev(snap); + rbd_remove_snap_dev(snap); +} + +static void rbd_update_mapping_size(struct rbd_device *rbd_dev) +{ + sector_t size; + + if (rbd_dev->spec->snap_id != CEPH_NOSNAP) + return; + + size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE; + dout("setting size to %llu sectors", (unsigned long long) size); + rbd_dev->mapping.size = (u64) size; + set_capacity(rbd_dev->disk, size); } /* * only read the first part of the ondisk header, without the snaps info */ -static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) +static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver) { int ret; struct rbd_image_header h; @@ -1730,17 +1832,9 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) down_write(&rbd_dev->header_rwsem); - /* resized? */ - if (rbd_dev->mapping.snap_id == CEPH_NOSNAP) { - sector_t size = (sector_t) h.image_size / SECTOR_SIZE; - - if (size != (sector_t) rbd_dev->mapping.size) { - dout("setting size to %llu sectors", - (unsigned long long) size); - rbd_dev->mapping.size = (u64) size; - set_capacity(rbd_dev->disk, size); - } - } + /* Update image size, and check for resize of mapped image */ + rbd_dev->header.image_size = h.image_size; + rbd_update_mapping_size(rbd_dev); /* rbd_dev->header.object_prefix shouldn't change */ kfree(rbd_dev->header.snap_sizes); @@ -1768,12 +1862,16 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) return ret; } -static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) +static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver) { int ret; + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - ret = __rbd_refresh_header(rbd_dev, hver); + if (rbd_dev->image_format == 1) + ret = rbd_dev_v1_refresh(rbd_dev, hver); + else + ret = rbd_dev_v2_refresh(rbd_dev, hver); mutex_unlock(&ctl_mutex); return ret; @@ -1885,7 +1983,7 @@ static ssize_t rbd_pool_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->pool_name); + return sprintf(buf, "%s\n", rbd_dev->spec->pool_name); } static ssize_t rbd_pool_id_show(struct device *dev, @@ -1893,7 +1991,8 @@ static ssize_t rbd_pool_id_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%d\n", rbd_dev->pool_id); + return sprintf(buf, "%llu\n", + (unsigned long long) rbd_dev->spec->pool_id); } static ssize_t rbd_name_show(struct device *dev, @@ -1901,7 +2000,10 @@ static ssize_t rbd_name_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->image_name); + if (rbd_dev->spec->image_name) + return sprintf(buf, "%s\n", rbd_dev->spec->image_name); + + return sprintf(buf, "(unknown)\n"); } static ssize_t rbd_image_id_show(struct device *dev, @@ -1909,7 +2011,7 @@ static ssize_t rbd_image_id_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->image_id); + return sprintf(buf, "%s\n", rbd_dev->spec->image_id); } /* @@ -1922,7 +2024,50 @@ static ssize_t rbd_snap_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->mapping.snap_name); + return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); +} + +/* + * For an rbd v2 image, shows the pool id, image id, and snapshot id + * for the parent image. If there is no parent, simply shows + * "(no parent image)". + */ +static ssize_t rbd_parent_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); + struct rbd_spec *spec = rbd_dev->parent_spec; + int count; + char *bufp = buf; + + if (!spec) + return sprintf(buf, "(no parent image)\n"); + + count = sprintf(bufp, "pool_id %llu\npool_name %s\n", + (unsigned long long) spec->pool_id, spec->pool_name); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id, + spec->image_name ? spec->image_name : "(unknown)"); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n", + (unsigned long long) spec->snap_id, spec->snap_name); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap); + if (count < 0) + return count; + bufp += count; + + return (ssize_t) (bufp - buf); } static ssize_t rbd_image_refresh(struct device *dev, @@ -1933,7 +2078,7 @@ static ssize_t rbd_image_refresh(struct device *dev, struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); int ret; - ret = rbd_refresh_header(rbd_dev, NULL); + ret = rbd_dev_refresh(rbd_dev, NULL); return ret < 0 ? ret : size; } @@ -1948,6 +2093,7 @@ static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL); static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); +static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL); static struct attribute *rbd_attrs[] = { &dev_attr_size.attr, @@ -1959,6 +2105,7 @@ static struct attribute *rbd_attrs[] = { &dev_attr_name.attr, &dev_attr_image_id.attr, &dev_attr_current_snap.attr, + &dev_attr_parent.attr, &dev_attr_refresh.attr, NULL }; @@ -2047,6 +2194,74 @@ static struct device_type rbd_snap_device_type = { .release = rbd_snap_dev_release, }; +static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec) +{ + kref_get(&spec->kref); + + return spec; +} + +static void rbd_spec_free(struct kref *kref); +static void rbd_spec_put(struct rbd_spec *spec) +{ + if (spec) + kref_put(&spec->kref, rbd_spec_free); +} + +static struct rbd_spec *rbd_spec_alloc(void) +{ + struct rbd_spec *spec; + + spec = kzalloc(sizeof (*spec), GFP_KERNEL); + if (!spec) + return NULL; + kref_init(&spec->kref); + + rbd_spec_put(rbd_spec_get(spec)); /* TEMPORARY */ + + return spec; +} + +static void rbd_spec_free(struct kref *kref) +{ + struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref); + + kfree(spec->pool_name); + kfree(spec->image_id); + kfree(spec->image_name); + kfree(spec->snap_name); + kfree(spec); +} + +struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, + struct rbd_spec *spec) +{ + struct rbd_device *rbd_dev; + + rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL); + if (!rbd_dev) + return NULL; + + spin_lock_init(&rbd_dev->lock); + INIT_LIST_HEAD(&rbd_dev->node); + INIT_LIST_HEAD(&rbd_dev->snaps); + init_rwsem(&rbd_dev->header_rwsem); + + rbd_dev->spec = spec; + rbd_dev->rbd_client = rbdc; + + return rbd_dev; +} + +static void rbd_dev_destroy(struct rbd_device *rbd_dev) +{ + rbd_spec_put(rbd_dev->parent_spec); + kfree(rbd_dev->header_name); + rbd_put_client(rbd_dev->rbd_client); + rbd_spec_put(rbd_dev->spec); + kfree(rbd_dev); +} + static bool rbd_snap_registered(struct rbd_snap *snap) { bool ret = snap->dev.type == &rbd_snap_device_type; @@ -2057,7 +2272,7 @@ static bool rbd_snap_registered(struct rbd_snap *snap) return ret; } -static void __rbd_remove_snap_dev(struct rbd_snap *snap) +static void rbd_remove_snap_dev(struct rbd_snap *snap) { list_del(&snap->node); if (device_is_registered(&snap->dev)) @@ -2073,7 +2288,7 @@ static int rbd_register_snap_dev(struct rbd_snap *snap, dev->type = &rbd_snap_device_type; dev->parent = parent; dev->release = rbd_snap_dev_release; - dev_set_name(dev, "snap_%s", snap->name); + dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name); dout("%s: registering device for snapshot %s\n", __func__, snap->name); ret = device_register(dev); @@ -2189,6 +2404,7 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); if (ret < 0) goto out; + ret = 0; /* rbd_req_sync_exec() can return positive */ p = reply_buf; rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, @@ -2216,6 +2432,7 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, __le64 features; __le64 incompat; } features_buf = { 0 }; + u64 incompat; int ret; ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, @@ -2226,6 +2443,11 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); if (ret < 0) return ret; + + incompat = le64_to_cpu(features_buf.incompat); + if (incompat & ~RBD_FEATURES_ALL) + return -ENXIO; + *snap_features = le64_to_cpu(features_buf.features); dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", @@ -2242,6 +2464,183 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev) &rbd_dev->header.features); } +static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) +{ + struct rbd_spec *parent_spec; + size_t size; + void *reply_buf = NULL; + __le64 snapid; + void *p; + void *end; + char *image_id; + u64 overlap; + size_t len = 0; + int ret; + + parent_spec = rbd_spec_alloc(); + if (!parent_spec) + return -ENOMEM; + + size = sizeof (__le64) + /* pool_id */ + sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX + /* image_id */ + sizeof (__le64) + /* snap_id */ + sizeof (__le64); /* overlap */ + reply_buf = kmalloc(size, GFP_KERNEL); + if (!reply_buf) { + ret = -ENOMEM; + goto out_err; + } + + snapid = cpu_to_le64(CEPH_NOSNAP); + ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + "rbd", "get_parent", + (char *) &snapid, sizeof (snapid), + (char *) reply_buf, size, + CEPH_OSD_FLAG_READ, NULL); + dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + if (ret < 0) + goto out_err; + + ret = -ERANGE; + p = reply_buf; + end = (char *) reply_buf + size; + ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); + if (parent_spec->pool_id == CEPH_NOPOOL) + goto out; /* No parent? No problem. */ + + image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + if (IS_ERR(image_id)) { + ret = PTR_ERR(image_id); + goto out_err; + } + parent_spec->image_id = image_id; + parent_spec->image_id_len = len; + ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); + ceph_decode_64_safe(&p, end, overlap, out_err); + + rbd_dev->parent_overlap = overlap; + rbd_dev->parent_spec = parent_spec; + parent_spec = NULL; /* rbd_dev now owns this */ +out: + ret = 0; +out_err: + kfree(reply_buf); + rbd_spec_put(parent_spec); + + return ret; +} + +static char *rbd_dev_image_name(struct rbd_device *rbd_dev) +{ + size_t image_id_size; + char *image_id; + void *p; + void *end; + size_t size; + void *reply_buf = NULL; + size_t len = 0; + char *image_name = NULL; + int ret; + + rbd_assert(!rbd_dev->spec->image_name); + + image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len; + image_id = kmalloc(image_id_size, GFP_KERNEL); + if (!image_id) + return NULL; + + p = image_id; + end = (char *) image_id + image_id_size; + ceph_encode_string(&p, end, rbd_dev->spec->image_id, + (u32) rbd_dev->spec->image_id_len); + + size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; + reply_buf = kmalloc(size, GFP_KERNEL); + if (!reply_buf) + goto out; + + ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, + "rbd", "dir_get_name", + image_id, image_id_size, + (char *) reply_buf, size, + CEPH_OSD_FLAG_READ, NULL); + if (ret < 0) + goto out; + p = reply_buf; + end = (char *) reply_buf + size; + image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + if (IS_ERR(image_name)) + image_name = NULL; + else + dout("%s: name is %s len is %zd\n", __func__, image_name, len); +out: + kfree(reply_buf); + kfree(image_id); + + return image_name; +} + +/* + * When a parent image gets probed, we only have the pool, image, + * and snapshot ids but not the names of any of them. This call + * is made later to fill in those names. It has to be done after + * rbd_dev_snaps_update() has completed because some of the + * information (in particular, snapshot name) is not available + * until then. + */ +static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) +{ + struct ceph_osd_client *osdc; + const char *name; + void *reply_buf = NULL; + int ret; + + if (rbd_dev->spec->pool_name) + return 0; /* Already have the names */ + + /* Look up the pool name */ + + osdc = &rbd_dev->rbd_client->client->osdc; + name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); + if (!name) + return -EIO; /* pool id too large (>= 2^31) */ + + rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); + if (!rbd_dev->spec->pool_name) + return -ENOMEM; + + /* Fetch the image name; tolerate failure here */ + + name = rbd_dev_image_name(rbd_dev); + if (name) { + rbd_dev->spec->image_name_len = strlen(name); + rbd_dev->spec->image_name = (char *) name; + } else { + pr_warning(RBD_DRV_NAME "%d " + "unable to get image name for image id %s\n", + rbd_dev->major, rbd_dev->spec->image_id); + } + + /* Look up the snapshot name. */ + + name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); + if (!name) { + ret = -EIO; + goto out_err; + } + rbd_dev->spec->snap_name = kstrdup(name, GFP_KERNEL); + if(!rbd_dev->spec->snap_name) + goto out_err; + + return 0; +out_err: + kfree(reply_buf); + kfree(rbd_dev->spec->pool_name); + rbd_dev->spec->pool_name = NULL; + + return ret; +} + static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) { size_t size; @@ -2328,7 +2727,6 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) int ret; void *p; void *end; - size_t snap_name_len; char *snap_name; size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN; @@ -2348,9 +2746,7 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) p = reply_buf; end = (char *) reply_buf + size; - snap_name_len = 0; - snap_name = ceph_extract_encoded_string(&p, end, &snap_name_len, - GFP_KERNEL); + snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); if (IS_ERR(snap_name)) { ret = PTR_ERR(snap_name); goto out; @@ -2397,6 +2793,41 @@ static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which, return ERR_PTR(-EINVAL); } +static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver) +{ + int ret; + __u8 obj_order; + + down_write(&rbd_dev->header_rwsem); + + /* Grab old order first, to see if it changes */ + + obj_order = rbd_dev->header.obj_order, + ret = rbd_dev_v2_image_size(rbd_dev); + if (ret) + goto out; + if (rbd_dev->header.obj_order != obj_order) { + ret = -EIO; + goto out; + } + rbd_update_mapping_size(rbd_dev); + + ret = rbd_dev_v2_snap_context(rbd_dev, hver); + dout("rbd_dev_v2_snap_context returned %d\n", ret); + if (ret) + goto out; + ret = rbd_dev_snaps_update(rbd_dev); + dout("rbd_dev_snaps_update returned %d\n", ret); + if (ret) + goto out; + ret = rbd_dev_snaps_register(rbd_dev); + dout("rbd_dev_snaps_register returned %d\n", ret); +out: + up_write(&rbd_dev->header_rwsem); + + return ret; +} + /* * Scan the rbd device's current snapshot list and compare it to the * newly-received snapshot context. Remove any existing snapshots @@ -2436,12 +2867,12 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) /* Existing snapshot not in the new snap context */ - if (rbd_dev->mapping.snap_id == snap->id) - rbd_dev->mapping.snap_exists = false; - __rbd_remove_snap_dev(snap); + if (rbd_dev->spec->snap_id == snap->id) + rbd_dev->exists = false; + rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", - rbd_dev->mapping.snap_id == snap->id ? - "mapped " : "", + rbd_dev->spec->snap_id == snap->id ? + "mapped " : "", (unsigned long long) snap->id); /* Done with this list entry; advance */ @@ -2559,7 +2990,7 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) do { ret = rbd_req_sync_watch(rbd_dev); if (ret == -ERANGE) { - rc = rbd_refresh_header(rbd_dev, NULL); + rc = rbd_dev_refresh(rbd_dev, NULL); if (rc < 0) return rc; } @@ -2621,8 +3052,8 @@ static void rbd_dev_id_put(struct rbd_device *rbd_dev) struct rbd_device *rbd_dev; rbd_dev = list_entry(tmp, struct rbd_device, node); - if (rbd_id > max_id) - max_id = rbd_id; + if (rbd_dev->dev_id > max_id) + max_id = rbd_dev->dev_id; } spin_unlock(&rbd_dev_list_lock); @@ -2722,73 +3153,140 @@ static inline char *dup_token(const char **buf, size_t *lenp) } /* - * This fills in the pool_name, image_name, image_name_len, rbd_dev, - * rbd_md_name, and name fields of the given rbd_dev, based on the - * list of monitor addresses and other options provided via - * /sys/bus/rbd/add. Returns a pointer to a dynamically-allocated - * copy of the snapshot name to map if successful, or a - * pointer-coded error otherwise. + * Parse the options provided for an "rbd add" (i.e., rbd image + * mapping) request. These arrive via a write to /sys/bus/rbd/add, + * and the data written is passed here via a NUL-terminated buffer. + * Returns 0 if successful or an error code otherwise. + * + * The information extracted from these options is recorded in + * the other parameters which return dynamically-allocated + * structures: + * ceph_opts + * The address of a pointer that will refer to a ceph options + * structure. Caller must release the returned pointer using + * ceph_destroy_options() when it is no longer needed. + * rbd_opts + * Address of an rbd options pointer. Fully initialized by + * this function; caller must release with kfree(). + * spec + * Address of an rbd image specification pointer. Fully + * initialized by this function based on parsed options. + * Caller must release with rbd_spec_put(). * - * Note: rbd_dev is assumed to have been initially zero-filled. + * The options passed take this form: + * <mon_addrs> <options> <pool_name> <image_name> [<snap_id>] + * where: + * <mon_addrs> + * A comma-separated list of one or more monitor addresses. + * A monitor address is an ip address, optionally followed + * by a port number (separated by a colon). + * I.e.: ip1[:port1][,ip2[:port2]...] + * <options> + * A comma-separated list of ceph and/or rbd options. + * <pool_name> + * The name of the rados pool containing the rbd image. + * <image_name> + * The name of the image in that pool to map. + * <snap_id> + * An optional snapshot id. If provided, the mapping will + * present data from the image at the time that snapshot was + * created. The image head is used if no snapshot id is + * provided. Snapshot mappings are always read-only. */ -static char *rbd_add_parse_args(struct rbd_device *rbd_dev, - const char *buf, - const char **mon_addrs, - size_t *mon_addrs_size, - char *options, - size_t options_size) +static int rbd_add_parse_args(const char *buf, + struct ceph_options **ceph_opts, + struct rbd_options **opts, + struct rbd_spec **rbd_spec) { size_t len; - char *err_ptr = ERR_PTR(-EINVAL); - char *snap_name; + char *options; + const char *mon_addrs; + size_t mon_addrs_size; + struct rbd_spec *spec = NULL; + struct rbd_options *rbd_opts = NULL; + struct ceph_options *copts; + int ret; /* The first four tokens are required */ len = next_token(&buf); if (!len) - return err_ptr; - *mon_addrs_size = len + 1; - *mon_addrs = buf; - + return -EINVAL; /* Missing monitor address(es) */ + mon_addrs = buf; + mon_addrs_size = len + 1; buf += len; - len = copy_token(&buf, options, options_size); - if (!len || len >= options_size) - return err_ptr; + ret = -EINVAL; + options = dup_token(&buf, NULL); + if (!options) + return -ENOMEM; + if (!*options) + goto out_err; /* Missing options */ - err_ptr = ERR_PTR(-ENOMEM); - rbd_dev->pool_name = dup_token(&buf, NULL); - if (!rbd_dev->pool_name) - goto out_err; + spec = rbd_spec_alloc(); + if (!spec) + goto out_mem; - rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len); - if (!rbd_dev->image_name) - goto out_err; + spec->pool_name = dup_token(&buf, NULL); + if (!spec->pool_name) + goto out_mem; + if (!*spec->pool_name) + goto out_err; /* Missing pool name */ - /* Snapshot name is optional */ + spec->image_name = dup_token(&buf, &spec->image_name_len); + if (!spec->image_name) + goto out_mem; + if (!*spec->image_name) + goto out_err; /* Missing image name */ + + /* + * Snapshot name is optional; default is to use "-" + * (indicating the head/no snapshot). + */ len = next_token(&buf); if (!len) { buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ len = sizeof (RBD_SNAP_HEAD_NAME) - 1; - } - snap_name = kmalloc(len + 1, GFP_KERNEL); - if (!snap_name) + } else if (len > RBD_MAX_SNAP_NAME_LEN) { + ret = -ENAMETOOLONG; goto out_err; - memcpy(snap_name, buf, len); - *(snap_name + len) = '\0'; + } + spec->snap_name = kmalloc(len + 1, GFP_KERNEL); + if (!spec->snap_name) + goto out_mem; + memcpy(spec->snap_name, buf, len); + *(spec->snap_name + len) = '\0'; -dout(" SNAP_NAME is <%s>, len is %zd\n", snap_name, len); + /* Initialize all rbd options to the defaults */ - return snap_name; + rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL); + if (!rbd_opts) + goto out_mem; + + rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; + + copts = ceph_parse_options(options, mon_addrs, + mon_addrs + mon_addrs_size - 1, + parse_rbd_opts_token, rbd_opts); + if (IS_ERR(copts)) { + ret = PTR_ERR(copts); + goto out_err; + } + kfree(options); + *ceph_opts = copts; + *opts = rbd_opts; + *rbd_spec = spec; + + return 0; +out_mem: + ret = -ENOMEM; out_err: - kfree(rbd_dev->image_name); - rbd_dev->image_name = NULL; - rbd_dev->image_name_len = 0; - kfree(rbd_dev->pool_name); - rbd_dev->pool_name = NULL; + kfree(rbd_opts); + rbd_spec_put(spec); + kfree(options); - return err_ptr; + return ret; } /* @@ -2814,14 +3312,22 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) void *p; /* + * When probing a parent image, the image id is already + * known (and the image name likely is not). There's no + * need to fetch the image id again in this case. + */ + if (rbd_dev->spec->image_id) + return 0; + + /* * First, see if the format 2 image id file exists, and if * so, get the image's persistent id from it. */ - size = sizeof (RBD_ID_PREFIX) + rbd_dev->image_name_len; + size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; object_name = kmalloc(size, GFP_NOIO); if (!object_name) return -ENOMEM; - sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->image_name); + sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name); dout("rbd id object name is %s\n", object_name); /* Response will be an encoded string, which includes a length */ @@ -2841,17 +3347,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); if (ret < 0) goto out; + ret = 0; /* rbd_req_sync_exec() can return positive */ p = response; - rbd_dev->image_id = ceph_extract_encoded_string(&p, + rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, p + RBD_IMAGE_ID_LEN_MAX, - &rbd_dev->image_id_len, + &rbd_dev->spec->image_id_len, GFP_NOIO); - if (IS_ERR(rbd_dev->image_id)) { - ret = PTR_ERR(rbd_dev->image_id); - rbd_dev->image_id = NULL; + if (IS_ERR(rbd_dev->spec->image_id)) { + ret = PTR_ERR(rbd_dev->spec->image_id); + rbd_dev->spec->image_id = NULL; } else { - dout("image_id is %s\n", rbd_dev->image_id); + dout("image_id is %s\n", rbd_dev->spec->image_id); } out: kfree(response); @@ -2867,26 +3374,33 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) /* Version 1 images have no id; empty string is used */ - rbd_dev->image_id = kstrdup("", GFP_KERNEL); - if (!rbd_dev->image_id) + rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); + if (!rbd_dev->spec->image_id) return -ENOMEM; - rbd_dev->image_id_len = 0; + rbd_dev->spec->image_id_len = 0; /* Record the header object name for this rbd image. */ - size = rbd_dev->image_name_len + sizeof (RBD_SUFFIX); + size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) { ret = -ENOMEM; goto out_err; } - sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX); + sprintf(rbd_dev->header_name, "%s%s", + rbd_dev->spec->image_name, RBD_SUFFIX); /* Populate rbd image metadata */ ret = rbd_read_header(rbd_dev, &rbd_dev->header); if (ret < 0) goto out_err; + + /* Version 1 images have no parent (no layering) */ + + rbd_dev->parent_spec = NULL; + rbd_dev->parent_overlap = 0; + rbd_dev->image_format = 1; dout("discovered version 1 image, header name is %s\n", @@ -2897,8 +3411,8 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) out_err: kfree(rbd_dev->header_name); rbd_dev->header_name = NULL; - kfree(rbd_dev->image_id); - rbd_dev->image_id = NULL; + kfree(rbd_dev->spec->image_id); + rbd_dev->spec->image_id = NULL; return ret; } @@ -2913,12 +3427,12 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) * Image id was filled in by the caller. Record the header * object name for this rbd image. */ - size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->image_id_len; + size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) return -ENOMEM; sprintf(rbd_dev->header_name, "%s%s", - RBD_HEADER_PREFIX, rbd_dev->image_id); + RBD_HEADER_PREFIX, rbd_dev->spec->image_id); /* Get the size and object order for the image */ @@ -2932,12 +3446,20 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) if (ret < 0) goto out_err; - /* Get the features for the image */ + /* Get the and check features for the image */ ret = rbd_dev_v2_features(rbd_dev); if (ret < 0) goto out_err; + /* If the image supports layering, get the parent info */ + + if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret < 0) + goto out_err; + } + /* crypto and compression type aren't (yet) supported for v2 images */ rbd_dev->header.crypt_type = 0; @@ -2955,8 +3477,11 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) dout("discovered version 2 image, header name is %s\n", rbd_dev->header_name); - return -ENOTSUPP; + return 0; out_err: + rbd_dev->parent_overlap = 0; + rbd_spec_put(rbd_dev->parent_spec); + rbd_dev->parent_spec = NULL; kfree(rbd_dev->header_name); rbd_dev->header_name = NULL; kfree(rbd_dev->header.object_prefix); @@ -2965,91 +3490,22 @@ out_err: return ret; } -/* - * Probe for the existence of the header object for the given rbd - * device. For format 2 images this includes determining the image - * id. - */ -static int rbd_dev_probe(struct rbd_device *rbd_dev) +static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) { int ret; - /* - * Get the id from the image id object. If it's not a - * format 2 image, we'll get ENOENT back, and we'll assume - * it's a format 1 image. - */ - ret = rbd_dev_image_id(rbd_dev); - if (ret) - ret = rbd_dev_v1_probe(rbd_dev); - else - ret = rbd_dev_v2_probe(rbd_dev); + /* no need to lock here, as rbd_dev is not registered yet */ + ret = rbd_dev_snaps_update(rbd_dev); if (ret) - dout("probe failed, returning %d\n", ret); - - return ret; -} - -static ssize_t rbd_add(struct bus_type *bus, - const char *buf, - size_t count) -{ - char *options; - struct rbd_device *rbd_dev = NULL; - const char *mon_addrs = NULL; - size_t mon_addrs_size = 0; - struct ceph_osd_client *osdc; - int rc = -ENOMEM; - char *snap_name; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - - options = kmalloc(count, GFP_KERNEL); - if (!options) - goto err_out_mem; - rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); - if (!rbd_dev) - goto err_out_mem; - - /* static rbd_device initialization */ - spin_lock_init(&rbd_dev->lock); - INIT_LIST_HEAD(&rbd_dev->node); - INIT_LIST_HEAD(&rbd_dev->snaps); - init_rwsem(&rbd_dev->header_rwsem); - - /* parse add command */ - snap_name = rbd_add_parse_args(rbd_dev, buf, - &mon_addrs, &mon_addrs_size, options, count); - if (IS_ERR(snap_name)) { - rc = PTR_ERR(snap_name); - goto err_out_mem; - } - - rc = rbd_get_client(rbd_dev, mon_addrs, mon_addrs_size - 1, options); - if (rc < 0) - goto err_out_args; - - /* pick the pool */ - osdc = &rbd_dev->rbd_client->client->osdc; - rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); - if (rc < 0) - goto err_out_client; - rbd_dev->pool_id = rc; - - rc = rbd_dev_probe(rbd_dev); - if (rc < 0) - goto err_out_client; - rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + return ret; - /* no need to lock here, as rbd_dev is not registered yet */ - rc = rbd_dev_snaps_update(rbd_dev); - if (rc) - goto err_out_header; + ret = rbd_dev_probe_update_spec(rbd_dev); + if (ret) + goto err_out_snaps; - rc = rbd_dev_set_mapping(rbd_dev, snap_name); - if (rc) - goto err_out_header; + ret = rbd_dev_set_mapping(rbd_dev); + if (ret) + goto err_out_snaps; /* generate unique id: find highest unique id, add one */ rbd_dev_id_get(rbd_dev); @@ -3061,34 +3517,33 @@ static ssize_t rbd_add(struct bus_type *bus, /* Get our block major device number. */ - rc = register_blkdev(0, rbd_dev->name); - if (rc < 0) + ret = register_blkdev(0, rbd_dev->name); + if (ret < 0) goto err_out_id; - rbd_dev->major = rc; + rbd_dev->major = ret; /* Set up the blkdev mapping. */ - rc = rbd_init_disk(rbd_dev); - if (rc) + ret = rbd_init_disk(rbd_dev); + if (ret) goto err_out_blkdev; - rc = rbd_bus_add_dev(rbd_dev); - if (rc) + ret = rbd_bus_add_dev(rbd_dev); + if (ret) goto err_out_disk; /* * At this point cleanup in the event of an error is the job * of the sysfs code (initiated by rbd_bus_del_dev()). */ - down_write(&rbd_dev->header_rwsem); - rc = rbd_dev_snaps_register(rbd_dev); + ret = rbd_dev_snaps_register(rbd_dev); up_write(&rbd_dev->header_rwsem); - if (rc) + if (ret) goto err_out_bus; - rc = rbd_init_watch_dev(rbd_dev); - if (rc) + ret = rbd_init_watch_dev(rbd_dev); + if (ret) goto err_out_bus; /* Everything's ready. Announce the disk to the world. */ @@ -3098,37 +3553,119 @@ static ssize_t rbd_add(struct bus_type *bus, pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name, (unsigned long long) rbd_dev->mapping.size); - return count; - + return ret; err_out_bus: /* this will also clean up rest of rbd_dev stuff */ rbd_bus_del_dev(rbd_dev); - kfree(options); - return rc; + return ret; err_out_disk: rbd_free_disk(rbd_dev); err_out_blkdev: unregister_blkdev(rbd_dev->major, rbd_dev->name); err_out_id: rbd_dev_id_put(rbd_dev); -err_out_header: - rbd_header_free(&rbd_dev->header); +err_out_snaps: + rbd_remove_all_snaps(rbd_dev); + + return ret; +} + +/* + * Probe for the existence of the header object for the given rbd + * device. For format 2 images this includes determining the image + * id. + */ +static int rbd_dev_probe(struct rbd_device *rbd_dev) +{ + int ret; + + /* + * Get the id from the image id object. If it's not a + * format 2 image, we'll get ENOENT back, and we'll assume + * it's a format 1 image. + */ + ret = rbd_dev_image_id(rbd_dev); + if (ret) + ret = rbd_dev_v1_probe(rbd_dev); + else + ret = rbd_dev_v2_probe(rbd_dev); + if (ret) { + dout("probe failed, returning %d\n", ret); + + return ret; + } + + ret = rbd_dev_probe_finish(rbd_dev); + if (ret) + rbd_header_free(&rbd_dev->header); + + return ret; +} + +static ssize_t rbd_add(struct bus_type *bus, + const char *buf, + size_t count) +{ + struct rbd_device *rbd_dev = NULL; + struct ceph_options *ceph_opts = NULL; + struct rbd_options *rbd_opts = NULL; + struct rbd_spec *spec = NULL; + struct rbd_client *rbdc; + struct ceph_osd_client *osdc; + int rc = -ENOMEM; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + /* parse add command */ + rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); + if (rc < 0) + goto err_out_module; + + rbdc = rbd_get_client(ceph_opts); + if (IS_ERR(rbdc)) { + rc = PTR_ERR(rbdc); + goto err_out_args; + } + ceph_opts = NULL; /* rbd_dev client now owns this */ + + /* pick the pool */ + osdc = &rbdc->client->osdc; + rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name); + if (rc < 0) + goto err_out_client; + spec->pool_id = (u64) rc; + + rbd_dev = rbd_dev_create(rbdc, spec); + if (!rbd_dev) + goto err_out_client; + rbdc = NULL; /* rbd_dev now owns this */ + spec = NULL; /* rbd_dev now owns this */ + + rbd_dev->mapping.read_only = rbd_opts->read_only; + kfree(rbd_opts); + rbd_opts = NULL; /* done with this */ + + rc = rbd_dev_probe(rbd_dev); + if (rc < 0) + goto err_out_rbd_dev; + + return count; +err_out_rbd_dev: + rbd_dev_destroy(rbd_dev); err_out_client: - kfree(rbd_dev->header_name); - rbd_put_client(rbd_dev); - kfree(rbd_dev->image_id); + rbd_put_client(rbdc); err_out_args: - kfree(rbd_dev->mapping.snap_name); - kfree(rbd_dev->image_name); - kfree(rbd_dev->pool_name); -err_out_mem: - kfree(rbd_dev); - kfree(options); + if (ceph_opts) + ceph_destroy_options(ceph_opts); + kfree(rbd_opts); + rbd_spec_put(spec); +err_out_module: + module_put(THIS_MODULE); dout("Error adding device %s\n", buf); - module_put(THIS_MODULE); return (ssize_t) rc; } @@ -3163,7 +3700,6 @@ static void rbd_dev_release(struct device *dev) if (rbd_dev->watch_event) rbd_req_sync_unwatch(rbd_dev); - rbd_put_client(rbd_dev); /* clean up and free blkdev */ rbd_free_disk(rbd_dev); @@ -3173,13 +3709,9 @@ static void rbd_dev_release(struct device *dev) rbd_header_free(&rbd_dev->header); /* done with the id, and with the rbd_dev */ - kfree(rbd_dev->mapping.snap_name); - kfree(rbd_dev->image_id); - kfree(rbd_dev->header_name); - kfree(rbd_dev->pool_name); - kfree(rbd_dev->image_name); rbd_dev_id_put(rbd_dev); - kfree(rbd_dev); + rbd_assert(rbd_dev->rbd_client != NULL); + rbd_dev_destroy(rbd_dev); /* release module ref */ module_put(THIS_MODULE); @@ -3211,7 +3743,12 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } - __rbd_remove_all_snaps(rbd_dev); + if (rbd_dev->open_count) { + ret = -EBUSY; + goto done; + } + + rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); done: diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h index cbe77fa105ba..49d77cbcf8bd 100644 --- a/drivers/block/rbd_types.h +++ b/drivers/block/rbd_types.h @@ -46,8 +46,6 @@ #define RBD_MIN_OBJ_ORDER 16 #define RBD_MAX_OBJ_ORDER 30 -#define RBD_MAX_SEG_NAME_LEN 128 - #define RBD_COMP_NONE 0 #define RBD_CRYPT_NONE 0 diff --git a/drivers/char/random.c b/drivers/char/random.c index b86eae9b77df..85e81ec1451e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -399,7 +399,6 @@ static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; -#if 0 static bool debug; module_param(debug, bool, 0644); #define DEBUG_ENT(fmt, arg...) do { \ @@ -410,9 +409,6 @@ module_param(debug, bool, 0644); blocking_pool.entropy_count,\ nonblocking_pool.entropy_count,\ ## arg); } while (0) -#else -#define DEBUG_ENT(fmt, arg...) do {} while (0) -#endif /********************************************************************** * @@ -437,6 +433,7 @@ struct entropy_store { int entropy_count; int entropy_total; unsigned int initialized:1; + bool last_data_init; __u8 last_data[EXTRACT_SIZE]; }; @@ -829,7 +826,7 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) bytes = min_t(int, bytes, sizeof(tmp)); DEBUG_ENT("going to reseed %s with %d bits " - "(%d of %d requested)\n", + "(%zu of %d requested)\n", r->name, bytes * 8, nbytes * 8, r->entropy_count); bytes = extract_entropy(r->pull, tmp, bytes, @@ -860,7 +857,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, spin_lock_irqsave(&r->lock, flags); BUG_ON(r->entropy_count > r->poolinfo->POOLBITS); - DEBUG_ENT("trying to extract %d bits from %s\n", + DEBUG_ENT("trying to extract %zu bits from %s\n", nbytes * 8, r->name); /* Can we pull enough? */ @@ -882,7 +879,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, } } - DEBUG_ENT("debiting %d entropy credits from %s%s\n", + DEBUG_ENT("debiting %zu entropy credits from %s%s\n", nbytes * 8, r->name, r->limit ? "" : " (unlimited)"); spin_unlock_irqrestore(&r->lock, flags); @@ -957,6 +954,10 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; + /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */ + if (fips_enabled && !r->last_data_init) + nbytes += EXTRACT_SIZE; + trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_); xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, min, reserved); @@ -967,6 +968,17 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, if (fips_enabled) { unsigned long flags; + + /* prime last_data value if need be, per fips 140-2 */ + if (!r->last_data_init) { + spin_lock_irqsave(&r->lock, flags); + memcpy(r->last_data, tmp, EXTRACT_SIZE); + r->last_data_init = true; + nbytes -= EXTRACT_SIZE; + spin_unlock_irqrestore(&r->lock, flags); + extract_buf(r, tmp); + } + spin_lock_irqsave(&r->lock, flags); if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) panic("Hardware RNG duplicated output!\n"); @@ -1086,6 +1098,7 @@ static void init_std_data(struct entropy_store *r) r->entropy_count = 0; r->entropy_total = 0; + r->last_data_init = false; mix_pool_bytes(r, &now, sizeof(now), NULL); for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof(rv)) { if (!arch_get_random_long(&rv)) @@ -1142,11 +1155,16 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) if (n > SEC_XFER_SIZE) n = SEC_XFER_SIZE; - DEBUG_ENT("reading %d bits\n", n*8); + DEBUG_ENT("reading %zu bits\n", n*8); n = extract_entropy_user(&blocking_pool, buf, n); - DEBUG_ENT("read got %d bits (%d still needed)\n", + if (n < 0) { + retval = n; + break; + } + + DEBUG_ENT("read got %zd bits (%zd still needed)\n", n*8, (nbytes-n)*8); if (n == 0) { @@ -1171,10 +1189,6 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) continue; } - if (n < 0) { - retval = n; - break; - } count += n; buf += n; nbytes -= n; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 90493d4ead1f..c594cb16c37b 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -37,8 +37,12 @@ #include <linux/wait.h> #include <linux/workqueue.h> #include <linux/module.h> +#include <linux/dma-mapping.h> +#include <linux/kconfig.h> #include "../tty/hvc/hvc_console.h" +#define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC) + /* * This is a global struct for storing common data for all the devices * this driver handles. @@ -111,6 +115,21 @@ struct port_buffer { size_t len; /* offset in the buf from which to consume data */ size_t offset; + + /* DMA address of buffer */ + dma_addr_t dma; + + /* Device we got DMA memory from */ + struct device *dev; + + /* List of pending dma buffers to free */ + struct list_head list; + + /* If sgpages == 0 then buf is used */ + unsigned int sgpages; + + /* sg is used if spages > 0. sg must be the last in is struct */ + struct scatterlist sg[0]; }; /* @@ -325,6 +344,11 @@ static bool is_console_port(struct port *port) return false; } +static bool is_rproc_serial(const struct virtio_device *vdev) +{ + return is_rproc_enabled && vdev->id.device == VIRTIO_ID_RPROC_SERIAL; +} + static inline bool use_multiport(struct ports_device *portdev) { /* @@ -336,20 +360,110 @@ static inline bool use_multiport(struct ports_device *portdev) return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT); } -static void free_buf(struct port_buffer *buf) +static DEFINE_SPINLOCK(dma_bufs_lock); +static LIST_HEAD(pending_free_dma_bufs); + +static void free_buf(struct port_buffer *buf, bool can_sleep) { - kfree(buf->buf); + unsigned int i; + + for (i = 0; i < buf->sgpages; i++) { + struct page *page = sg_page(&buf->sg[i]); + if (!page) + break; + put_page(page); + } + + if (!buf->dev) { + kfree(buf->buf); + } else if (is_rproc_enabled) { + unsigned long flags; + + /* dma_free_coherent requires interrupts to be enabled. */ + if (!can_sleep) { + /* queue up dma-buffers to be freed later */ + spin_lock_irqsave(&dma_bufs_lock, flags); + list_add_tail(&buf->list, &pending_free_dma_bufs); + spin_unlock_irqrestore(&dma_bufs_lock, flags); + return; + } + dma_free_coherent(buf->dev, buf->size, buf->buf, buf->dma); + + /* Release device refcnt and allow it to be freed */ + put_device(buf->dev); + } + kfree(buf); } -static struct port_buffer *alloc_buf(size_t buf_size) +static void reclaim_dma_bufs(void) +{ + unsigned long flags; + struct port_buffer *buf, *tmp; + LIST_HEAD(tmp_list); + + if (list_empty(&pending_free_dma_bufs)) + return; + + /* Create a copy of the pending_free_dma_bufs while holding the lock */ + spin_lock_irqsave(&dma_bufs_lock, flags); + list_cut_position(&tmp_list, &pending_free_dma_bufs, + pending_free_dma_bufs.prev); + spin_unlock_irqrestore(&dma_bufs_lock, flags); + + /* Release the dma buffers, without irqs enabled */ + list_for_each_entry_safe(buf, tmp, &tmp_list, list) { + list_del(&buf->list); + free_buf(buf, true); + } +} + +static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size, + int pages) { struct port_buffer *buf; - buf = kmalloc(sizeof(*buf), GFP_KERNEL); + reclaim_dma_bufs(); + + /* + * Allocate buffer and the sg list. The sg list array is allocated + * directly after the port_buffer struct. + */ + buf = kmalloc(sizeof(*buf) + sizeof(struct scatterlist) * pages, + GFP_KERNEL); if (!buf) goto fail; - buf->buf = kzalloc(buf_size, GFP_KERNEL); + + buf->sgpages = pages; + if (pages > 0) { + buf->dev = NULL; + buf->buf = NULL; + return buf; + } + + if (is_rproc_serial(vq->vdev)) { + /* + * Allocate DMA memory from ancestor. When a virtio + * device is created by remoteproc, the DMA memory is + * associated with the grandparent device: + * vdev => rproc => platform-dev. + * The code here would have been less quirky if + * DMA_MEMORY_INCLUDES_CHILDREN had been supported + * in dma-coherent.c + */ + if (!vq->vdev->dev.parent || !vq->vdev->dev.parent->parent) + goto free_buf; + buf->dev = vq->vdev->dev.parent->parent; + + /* Increase device refcnt to avoid freeing it */ + get_device(buf->dev); + buf->buf = dma_alloc_coherent(buf->dev, buf_size, &buf->dma, + GFP_KERNEL); + } else { + buf->dev = NULL; + buf->buf = kmalloc(buf_size, GFP_KERNEL); + } + if (!buf->buf) goto free_buf; buf->len = 0; @@ -396,6 +510,8 @@ static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf) ret = virtqueue_add_buf(vq, sg, 0, 1, buf, GFP_ATOMIC); virtqueue_kick(vq); + if (!ret) + ret = vq->num_free; return ret; } @@ -416,7 +532,7 @@ static void discard_port_data(struct port *port) port->stats.bytes_discarded += buf->len - buf->offset; if (add_inbuf(port->in_vq, buf) < 0) { err++; - free_buf(buf); + free_buf(buf, false); } port->inbuf = NULL; buf = get_inbuf(port); @@ -459,7 +575,7 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, vq = portdev->c_ovq; sg_init_one(sg, &cpkt, sizeof(cpkt)); - if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) >= 0) { + if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) == 0) { virtqueue_kick(vq); while (!virtqueue_get_buf(vq, &len)) cpu_relax(); @@ -476,55 +592,29 @@ static ssize_t send_control_msg(struct port *port, unsigned int event, return 0; } -struct buffer_token { - union { - void *buf; - struct scatterlist *sg; - } u; - /* If sgpages == 0 then buf is used, else sg is used */ - unsigned int sgpages; -}; - -static void reclaim_sg_pages(struct scatterlist *sg, unsigned int nrpages) -{ - int i; - struct page *page; - - for (i = 0; i < nrpages; i++) { - page = sg_page(&sg[i]); - if (!page) - break; - put_page(page); - } - kfree(sg); -} /* Callers must take the port->outvq_lock */ static void reclaim_consumed_buffers(struct port *port) { - struct buffer_token *tok; + struct port_buffer *buf; unsigned int len; if (!port->portdev) { /* Device has been unplugged. vqs are already gone. */ return; } - while ((tok = virtqueue_get_buf(port->out_vq, &len))) { - if (tok->sgpages) - reclaim_sg_pages(tok->u.sg, tok->sgpages); - else - kfree(tok->u.buf); - kfree(tok); + while ((buf = virtqueue_get_buf(port->out_vq, &len))) { + free_buf(buf, false); port->outvq_full = false; } } static ssize_t __send_to_port(struct port *port, struct scatterlist *sg, int nents, size_t in_count, - struct buffer_token *tok, bool nonblock) + void *data, bool nonblock) { struct virtqueue *out_vq; - ssize_t ret; + int err; unsigned long flags; unsigned int len; @@ -534,17 +624,17 @@ static ssize_t __send_to_port(struct port *port, struct scatterlist *sg, reclaim_consumed_buffers(port); - ret = virtqueue_add_buf(out_vq, sg, nents, 0, tok, GFP_ATOMIC); + err = virtqueue_add_buf(out_vq, sg, nents, 0, data, GFP_ATOMIC); /* Tell Host to go! */ virtqueue_kick(out_vq); - if (ret < 0) { + if (err) { in_count = 0; goto done; } - if (ret == 0) + if (out_vq->num_free == 0) port->outvq_full = true; if (nonblock) @@ -572,37 +662,6 @@ done: return in_count; } -static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, - bool nonblock) -{ - struct scatterlist sg[1]; - struct buffer_token *tok; - - tok = kmalloc(sizeof(*tok), GFP_ATOMIC); - if (!tok) - return -ENOMEM; - tok->sgpages = 0; - tok->u.buf = in_buf; - - sg_init_one(sg, in_buf, in_count); - - return __send_to_port(port, sg, 1, in_count, tok, nonblock); -} - -static ssize_t send_pages(struct port *port, struct scatterlist *sg, int nents, - size_t in_count, bool nonblock) -{ - struct buffer_token *tok; - - tok = kmalloc(sizeof(*tok), GFP_ATOMIC); - if (!tok) - return -ENOMEM; - tok->sgpages = nents; - tok->u.sg = sg; - - return __send_to_port(port, sg, nents, in_count, tok, nonblock); -} - /* * Give out the data that's requested from the buffer that we have * queued up. @@ -748,9 +807,10 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { struct port *port; - char *buf; + struct port_buffer *buf; ssize_t ret; bool nonblock; + struct scatterlist sg[1]; /* Userspace could be out to fool us */ if (!count) @@ -766,11 +826,11 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, count = min((size_t)(32 * 1024), count); - buf = kmalloc(count, GFP_KERNEL); + buf = alloc_buf(port->out_vq, count, 0); if (!buf) return -ENOMEM; - ret = copy_from_user(buf, ubuf, count); + ret = copy_from_user(buf->buf, ubuf, count); if (ret) { ret = -EFAULT; goto free_buf; @@ -784,13 +844,14 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, * through to the host. */ nonblock = true; - ret = send_buf(port, buf, count, nonblock); + sg_init_one(sg, buf->buf, count); + ret = __send_to_port(port, sg, 1, count, buf, nonblock); if (nonblock && ret > 0) goto out; free_buf: - kfree(buf); + free_buf(buf, true); out: return ret; } @@ -856,6 +917,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, struct port *port = filp->private_data; struct sg_list sgl; ssize_t ret; + struct port_buffer *buf; struct splice_desc sd = { .total_len = len, .flags = flags, @@ -863,22 +925,34 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, .u.data = &sgl, }; + /* + * Rproc_serial does not yet support splice. To support splice + * pipe_to_sg() must allocate dma-buffers and copy content from + * regular pages to dma pages. And alloc_buf and free_buf must + * support allocating and freeing such a list of dma-buffers. + */ + if (is_rproc_serial(port->out_vq->vdev)) + return -EINVAL; + ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); if (ret < 0) return ret; + buf = alloc_buf(port->out_vq, 0, pipe->nrbufs); + if (!buf) + return -ENOMEM; + sgl.n = 0; sgl.len = 0; sgl.size = pipe->nrbufs; - sgl.sg = kmalloc(sizeof(struct scatterlist) * sgl.size, GFP_KERNEL); - if (unlikely(!sgl.sg)) - return -ENOMEM; - + sgl.sg = buf->sg; sg_init_table(sgl.sg, sgl.size); ret = __splice_from_pipe(pipe, &sd, pipe_to_sg); if (likely(ret > 0)) - ret = send_pages(port, sgl.sg, sgl.n, sgl.len, true); + ret = __send_to_port(port, buf->sg, sgl.n, sgl.len, buf, true); + if (unlikely(ret <= 0)) + free_buf(buf, true); return ret; } @@ -927,6 +1001,7 @@ static int port_fops_release(struct inode *inode, struct file *filp) reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); + reclaim_dma_bufs(); /* * Locks aren't necessary here as a port can't be opened after * unplug, and if a port isn't unplugged, a kref would already @@ -1031,6 +1106,7 @@ static const struct file_operations port_fops = { static int put_chars(u32 vtermno, const char *buf, int count) { struct port *port; + struct scatterlist sg[1]; if (unlikely(early_put_chars)) return early_put_chars(vtermno, buf, count); @@ -1039,7 +1115,8 @@ static int put_chars(u32 vtermno, const char *buf, int count) if (!port) return -EPIPE; - return send_buf(port, (void *)buf, count, false); + sg_init_one(sg, buf, count); + return __send_to_port(port, sg, 1, count, (void *)buf, false); } /* @@ -1076,7 +1153,10 @@ static void resize_console(struct port *port) return; vdev = port->portdev->vdev; - if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) + + /* Don't test F_SIZE at all if we're rproc: not a valid feature! */ + if (!is_rproc_serial(vdev) && + virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) hvc_resize(port->cons.hvc, port->cons.ws); } @@ -1260,7 +1340,7 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) nr_added_bufs = 0; do { - buf = alloc_buf(PAGE_SIZE); + buf = alloc_buf(vq, PAGE_SIZE, 0); if (!buf) break; @@ -1268,7 +1348,7 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) ret = add_inbuf(vq, buf); if (ret < 0) { spin_unlock_irq(lock); - free_buf(buf); + free_buf(buf, true); break; } nr_added_bufs++; @@ -1356,10 +1436,18 @@ static int add_port(struct ports_device *portdev, u32 id) goto free_device; } - /* - * If we're not using multiport support, this has to be a console port - */ - if (!use_multiport(port->portdev)) { + if (is_rproc_serial(port->portdev->vdev)) + /* + * For rproc_serial assume remote processor is connected. + * rproc_serial does not want the console port, only + * the generic port implementation. + */ + port->host_connected = true; + else if (!use_multiport(port->portdev)) { + /* + * If we're not using multiport support, + * this has to be a console port. + */ err = init_port_console(port); if (err) goto free_inbufs; @@ -1392,7 +1480,7 @@ static int add_port(struct ports_device *portdev, u32 id) free_inbufs: while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf); + free_buf(buf, true); free_device: device_destroy(pdrvdata.class, port->dev->devt); free_cdev: @@ -1434,7 +1522,11 @@ static void remove_port_data(struct port *port) /* Remove buffers we queued up for the Host to send us data in. */ while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf); + free_buf(buf, true); + + /* Free pending buffers from the out-queue. */ + while ((buf = virtqueue_detach_unused_buf(port->out_vq))) + free_buf(buf, true); } /* @@ -1636,7 +1728,7 @@ static void control_work_handler(struct work_struct *work) if (add_inbuf(portdev->c_ivq, buf) < 0) { dev_warn(&portdev->vdev->dev, "Error adding buffer to queue\n"); - free_buf(buf); + free_buf(buf, false); } } spin_unlock(&portdev->cvq_lock); @@ -1832,10 +1924,10 @@ static void remove_controlq_data(struct ports_device *portdev) return; while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) - free_buf(buf); + free_buf(buf, true); while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) - free_buf(buf); + free_buf(buf, true); } /* @@ -1882,11 +1974,15 @@ static int virtcons_probe(struct virtio_device *vdev) multiport = false; portdev->config.max_nr_ports = 1; - if (virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT, - offsetof(struct virtio_console_config, - max_nr_ports), - &portdev->config.max_nr_ports) == 0) + + /* Don't test MULTIPORT at all if we're rproc: not a valid feature! */ + if (!is_rproc_serial(vdev) && + virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT, + offsetof(struct virtio_console_config, + max_nr_ports), + &portdev->config.max_nr_ports) == 0) { multiport = true; + } err = init_vqs(portdev); if (err < 0) { @@ -1996,6 +2092,16 @@ static unsigned int features[] = { VIRTIO_CONSOLE_F_MULTIPORT, }; +static struct virtio_device_id rproc_serial_id_table[] = { +#if IS_ENABLED(CONFIG_REMOTEPROC) + { VIRTIO_ID_RPROC_SERIAL, VIRTIO_DEV_ANY_ID }, +#endif + { 0 }, +}; + +static unsigned int rproc_serial_features[] = { +}; + #ifdef CONFIG_PM static int virtcons_freeze(struct virtio_device *vdev) { @@ -2080,6 +2186,20 @@ static struct virtio_driver virtio_console = { #endif }; +/* + * virtio_rproc_serial refers to __devinit function which causes + * section mismatch warnings. So use __refdata to silence warnings. + */ +static struct virtio_driver __refdata virtio_rproc_serial = { + .feature_table = rproc_serial_features, + .feature_table_size = ARRAY_SIZE(rproc_serial_features), + .driver.name = "virtio_rproc_serial", + .driver.owner = THIS_MODULE, + .id_table = rproc_serial_id_table, + .probe = virtcons_probe, + .remove = virtcons_remove, +}; + static int __init init(void) { int err; @@ -2104,7 +2224,15 @@ static int __init init(void) pr_err("Error %d registering virtio driver\n", err); goto free; } + err = register_virtio_driver(&virtio_rproc_serial); + if (err < 0) { + pr_err("Error %d registering virtio rproc serial driver\n", + err); + goto unregister; + } return 0; +unregister: + unregister_virtio_driver(&virtio_console); free: if (pdrvdata.debugfs_dir) debugfs_remove_recursive(pdrvdata.debugfs_dir); @@ -2114,7 +2242,10 @@ free: static void __exit fini(void) { + reclaim_dma_bufs(); + unregister_virtio_driver(&virtio_console); + unregister_virtio_driver(&virtio_rproc_serial); class_destroy(pdrvdata.class); if (pdrvdata.debugfs_dir) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index b298158cb922..fd3ae6290d71 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -16,6 +16,7 @@ */ static char dmi_empty_string[] = " "; +static u16 __initdata dmi_ver; /* * Catch too early calls to dmi_check_system(): */ @@ -118,12 +119,12 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *, return 0; } -static int __init dmi_checksum(const u8 *buf) +static int __init dmi_checksum(const u8 *buf, u8 len) { u8 sum = 0; int a; - for (a = 0; a < 15; a++) + for (a = 0; a < len; a++) sum += buf[a]; return sum == 0; @@ -161,8 +162,10 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde return; for (i = 0; i < 16 && (is_ff || is_00); i++) { - if(d[i] != 0x00) is_ff = 0; - if(d[i] != 0xFF) is_00 = 0; + if (d[i] != 0x00) + is_00 = 0; + if (d[i] != 0xFF) + is_ff = 0; } if (is_ff || is_00) @@ -172,7 +175,15 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde if (!s) return; - sprintf(s, "%pUB", d); + /* + * As of version 2.6 of the SMBIOS specification, the first 3 fields of + * the UUID are supposed to be little-endian encoded. The specification + * says that this is the defacto standard. + */ + if (dmi_ver >= 0x0206) + sprintf(s, "%pUL", d); + else + sprintf(s, "%pUB", d); dmi_ident[slot] = s; } @@ -404,29 +415,57 @@ static int __init dmi_present(const char __iomem *p) u8 buf[15]; memcpy_fromio(buf, p, 15); - if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { + if (dmi_checksum(buf, 15)) { dmi_num = (buf[13] << 8) | buf[12]; dmi_len = (buf[7] << 8) | buf[6]; dmi_base = (buf[11] << 24) | (buf[10] << 16) | (buf[9] << 8) | buf[8]; - /* - * DMI version 0.0 means that the real version is taken from - * the SMBIOS version, which we don't know at this point. - */ - if (buf[14] != 0) - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0xF); - else - printk(KERN_INFO "DMI present.\n"); if (dmi_walk_early(dmi_decode) == 0) { + if (dmi_ver) + pr_info("SMBIOS %d.%d present.\n", + dmi_ver >> 8, dmi_ver & 0xFF); + else { + dmi_ver = (buf[14] & 0xF0) << 4 | + (buf[14] & 0x0F); + pr_info("Legacy DMI %d.%d present.\n", + dmi_ver >> 8, dmi_ver & 0xFF); + } dmi_dump_ids(); return 0; } } + dmi_ver = 0; return 1; } +static int __init smbios_present(const char __iomem *p) +{ + u8 buf[32]; + int offset = 0; + + memcpy_fromio(buf, p, 32); + if ((buf[5] < 32) && dmi_checksum(buf, buf[5])) { + dmi_ver = (buf[6] << 8) + buf[7]; + + /* Some BIOS report weird SMBIOS version, fix that up */ + switch (dmi_ver) { + case 0x021F: + case 0x0221: + pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", + dmi_ver & 0xFF, 3); + dmi_ver = 0x0203; + break; + case 0x0233: + pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6); + dmi_ver = 0x0206; + break; + } + offset = 16; + } + return dmi_present(buf + offset); +} + void __init dmi_scan_machine(void) { char __iomem *p, *q; @@ -444,7 +483,7 @@ void __init dmi_scan_machine(void) if (p == NULL) goto error; - rc = dmi_present(p + 0x10); /* offset of _DMI_ string */ + rc = smbios_present(p); dmi_iounmap(p, 32); if (!rc) { dmi_available = 1; @@ -462,7 +501,12 @@ void __init dmi_scan_machine(void) goto error; for (q = p; q < p + 0x10000; q += 16) { - rc = dmi_present(q); + if (memcmp(q, "_SM_", 4) == 0 && q - p <= 0xFFE0) + rc = smbios_present(q); + else if (memcmp(q, "_DMI_", 5) == 0) + rc = dmi_present(q); + else + continue; if (!rc) { dmi_available = 1; dmi_iounmap(p, 0x10000); diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 8ae1f5b19669..682de754d63f 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -172,6 +172,7 @@ config GPIO_MSM_V2 config GPIO_MVEBU def_bool y depends on PLAT_ORION + depends on OF select GPIO_GENERIC select GENERIC_IRQ_CHIP diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c index 6cc87ac8e019..6f2306db8591 100644 --- a/drivers/gpio/gpio-ich.c +++ b/drivers/gpio/gpio-ich.c @@ -390,6 +390,7 @@ static int ichx_gpio_probe(struct platform_device *pdev) return -ENODEV; } + spin_lock_init(&ichx_priv.lock); res_base = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_GPIO); ichx_priv.use_gpio = ich_info->use_gpio; err = ichx_gpio_request_regions(res_base, pdev->name, diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index d767b534c4af..7d9bd94be8d2 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -41,7 +41,6 @@ #include <linux/io.h> #include <linux/of_irq.h> #include <linux/of_device.h> -#include <linux/platform_device.h> #include <linux/pinctrl/consumer.h> /* @@ -469,19 +468,6 @@ static void mvebu_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) } } -static struct platform_device_id mvebu_gpio_ids[] = { - { - .name = "orion-gpio", - }, { - .name = "mv78200-gpio", - }, { - .name = "armadaxp-gpio", - }, { - /* sentinel */ - }, -}; -MODULE_DEVICE_TABLE(platform, mvebu_gpio_ids); - static struct of_device_id mvebu_gpio_of_match[] = { { .compatible = "marvell,orion-gpio", @@ -555,9 +541,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) mvchip->chip.base = id * MVEBU_MAX_GPIO_PER_BANK; mvchip->chip.ngpio = ngpios; mvchip->chip.can_sleep = 0; -#ifdef CONFIG_OF mvchip->chip.of_node = np; -#endif spin_lock_init(&mvchip->lock); mvchip->membase = devm_request_and_ioremap(&pdev->dev, res); @@ -698,7 +682,6 @@ static struct platform_driver mvebu_gpio_driver = { .of_match_table = mvebu_gpio_of_match, }, .probe = mvebu_gpio_probe, - .id_table = mvebu_gpio_ids, }; static int __init mvebu_gpio_init(void) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a9151337d5b9..33d20be87db5 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -579,7 +579,7 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, * at this point the buffer should be dead, so * no new sync objects can be attached. */ - sync_obj = driver->sync_obj_ref(&bo->sync_obj); + sync_obj = driver->sync_obj_ref(bo->sync_obj); spin_unlock(&bdev->fence_lock); atomic_set(&bo->reserved, 0); diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c index 9f26400713f0..89cfd64b3373 100644 --- a/drivers/hwmon/hwmon-vid.c +++ b/drivers/hwmon/hwmon-vid.c @@ -115,6 +115,12 @@ int vid_from_reg(int val, u8 vrm) return (val < 32) ? 1550 - 25 * val : 775 - (25 * (val - 31)) / 2; + case 26: /* AMD family 10h to 15h, serial VID */ + val &= 0x7f; + if (val >= 0x7c) + return 0; + return DIV_ROUND_CLOSEST(15500 - 125 * val, 10); + case 91: /* VRM 9.1 */ case 90: /* VRM 9.0 */ val &= 0x1f; @@ -195,6 +201,10 @@ static struct vrm_model vrm_models[] = { {X86_VENDOR_AMD, 0xF, 0x40, 0x7F, ANY, 24}, /* NPT family 0Fh */ {X86_VENDOR_AMD, 0xF, 0x80, ANY, ANY, 25}, /* future fam. 0Fh */ {X86_VENDOR_AMD, 0x10, 0x0, ANY, ANY, 25}, /* NPT family 10h */ + {X86_VENDOR_AMD, 0x11, 0x0, ANY, ANY, 26}, /* family 11h */ + {X86_VENDOR_AMD, 0x12, 0x0, ANY, ANY, 26}, /* family 12h */ + {X86_VENDOR_AMD, 0x14, 0x0, ANY, ANY, 26}, /* family 14h */ + {X86_VENDOR_AMD, 0x15, 0x0, ANY, ANY, 26}, /* family 15h */ {X86_VENDOR_INTEL, 0x6, 0x0, 0x6, ANY, 82}, /* Pentium Pro, * Pentium II, Xeon, diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index c3c471ca202f..646314f7c839 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -84,19 +84,21 @@ static void __init hwmon_pci_quirks(void) /* Open access to 0x295-0x296 on MSI MS-7031 */ sb = pci_get_device(PCI_VENDOR_ID_ATI, 0x436c, NULL); - if (sb && - (sb->subsystem_vendor == 0x1462 && /* MSI */ - sb->subsystem_device == 0x0031)) { /* MS-7031 */ - - pci_read_config_byte(sb, 0x48, &enable); - pci_read_config_word(sb, 0x64, &base); - - if (base == 0 && !(enable & BIT(2))) { - dev_info(&sb->dev, - "Opening wide generic port at 0x295\n"); - pci_write_config_word(sb, 0x64, 0x295); - pci_write_config_byte(sb, 0x48, enable | BIT(2)); + if (sb) { + if (sb->subsystem_vendor == 0x1462 && /* MSI */ + sb->subsystem_device == 0x0031) { /* MS-7031 */ + pci_read_config_byte(sb, 0x48, &enable); + pci_read_config_word(sb, 0x64, &base); + + if (base == 0 && !(enable & BIT(2))) { + dev_info(&sb->dev, + "Opening wide generic port at 0x295\n"); + pci_write_config_word(sb, 0x64, 0x295); + pci_write_config_byte(sb, 0x48, + enable | BIT(2)); + } } + pci_dev_put(sb); } #endif } diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index d32aa354cbdf..117d66fcded6 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -203,6 +203,8 @@ static const u8 IT87_REG_FAN[] = { 0x0d, 0x0e, 0x0f, 0x80, 0x82 }; static const u8 IT87_REG_FAN_MIN[] = { 0x10, 0x11, 0x12, 0x84, 0x86 }; static const u8 IT87_REG_FANX[] = { 0x18, 0x19, 0x1a, 0x81, 0x83 }; static const u8 IT87_REG_FANX_MIN[] = { 0x1b, 0x1c, 0x1d, 0x85, 0x87 }; +static const u8 IT87_REG_TEMP_OFFSET[] = { 0x56, 0x57, 0x59 }; + #define IT87_REG_FAN_MAIN_CTRL 0x13 #define IT87_REG_FAN_CTL 0x14 #define IT87_REG_PWM(nr) (0x15 + (nr)) @@ -226,6 +228,83 @@ static const u8 IT87_REG_FANX_MIN[] = { 0x1b, 0x1c, 0x1d, 0x85, 0x87 }; #define IT87_REG_AUTO_TEMP(nr, i) (0x60 + (nr) * 8 + (i)) #define IT87_REG_AUTO_PWM(nr, i) (0x65 + (nr) * 8 + (i)) +struct it87_devices { + const char *name; + u16 features; + u8 peci_mask; + u8 old_peci_mask; +}; + +#define FEAT_12MV_ADC (1 << 0) +#define FEAT_NEWER_AUTOPWM (1 << 1) +#define FEAT_OLD_AUTOPWM (1 << 2) +#define FEAT_16BIT_FANS (1 << 3) +#define FEAT_TEMP_OFFSET (1 << 4) +#define FEAT_TEMP_PECI (1 << 5) +#define FEAT_TEMP_OLD_PECI (1 << 6) + +static const struct it87_devices it87_devices[] = { + [it87] = { + .name = "it87", + .features = FEAT_OLD_AUTOPWM, /* may need to overwrite */ + }, + [it8712] = { + .name = "it8712", + .features = FEAT_OLD_AUTOPWM, /* may need to overwrite */ + }, + [it8716] = { + .name = "it8716", + .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET, + }, + [it8718] = { + .name = "it8718", + .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET + | FEAT_TEMP_OLD_PECI, + .old_peci_mask = 0x4, + }, + [it8720] = { + .name = "it8720", + .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET + | FEAT_TEMP_OLD_PECI, + .old_peci_mask = 0x4, + }, + [it8721] = { + .name = "it8721", + .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS + | FEAT_TEMP_OFFSET | FEAT_TEMP_OLD_PECI | FEAT_TEMP_PECI, + .peci_mask = 0x05, + .old_peci_mask = 0x02, /* Actually reports PCH */ + }, + [it8728] = { + .name = "it8728", + .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS + | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI, + .peci_mask = 0x07, + }, + [it8782] = { + .name = "it8782", + .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET + | FEAT_TEMP_OLD_PECI, + .old_peci_mask = 0x4, + }, + [it8783] = { + .name = "it8783", + .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET + | FEAT_TEMP_OLD_PECI, + .old_peci_mask = 0x4, + }, +}; + +#define has_16bit_fans(data) ((data)->features & FEAT_16BIT_FANS) +#define has_12mv_adc(data) ((data)->features & FEAT_12MV_ADC) +#define has_newer_autopwm(data) ((data)->features & FEAT_NEWER_AUTOPWM) +#define has_old_autopwm(data) ((data)->features & FEAT_OLD_AUTOPWM) +#define has_temp_offset(data) ((data)->features & FEAT_TEMP_OFFSET) +#define has_temp_peci(data, nr) (((data)->features & FEAT_TEMP_PECI) && \ + ((data)->peci_mask & (1 << nr))) +#define has_temp_old_peci(data, nr) \ + (((data)->features & FEAT_TEMP_OLD_PECI) && \ + ((data)->old_peci_mask & (1 << nr))) struct it87_sio_data { enum chips type; @@ -249,7 +328,9 @@ struct it87_sio_data { struct it87_data { struct device *hwmon_dev; enum chips type; - u8 revision; + u16 features; + u8 peci_mask; + u8 old_peci_mask; unsigned short addr; const char *name; @@ -258,17 +339,13 @@ struct it87_data { unsigned long last_updated; /* In jiffies */ u16 in_scaled; /* Internal voltage sensors are scaled */ - u8 in[9]; /* Register value */ - u8 in_max[8]; /* Register value */ - u8 in_min[8]; /* Register value */ + u8 in[9][3]; /* [nr][0]=in, [1]=min, [2]=max */ u8 has_fan; /* Bitfield, fans enabled */ - u16 fan[5]; /* Register values, possibly combined */ - u16 fan_min[5]; /* Register values, possibly combined */ + u16 fan[5][2]; /* Register values, [nr][0]=fan, [1]=min */ u8 has_temp; /* Bitfield, temp sensors enabled */ - s8 temp[3]; /* Register value */ - s8 temp_high[3]; /* Register value */ - s8 temp_low[3]; /* Register value */ - u8 sensor; /* Register value */ + s8 temp[3][4]; /* [nr][0]=temp, [1]=min, [2]=max, [3]=offset */ + u8 sensor; /* Register value (IT87_REG_TEMP_ENABLE) */ + u8 extra; /* Register value (IT87_REG_TEMP_EXTRA) */ u8 fan_div[3]; /* Register encoding, shifted right */ u8 vid; /* Register encoding, combined */ u8 vrm; @@ -296,26 +373,6 @@ struct it87_data { s8 auto_temp[3][5]; /* [nr][0] is point1_temp_hyst */ }; -static inline int has_12mv_adc(const struct it87_data *data) -{ - /* - * IT8721F and later have a 12 mV ADC, also with internal scaling - * on selected inputs. - */ - return data->type == it8721 - || data->type == it8728; -} - -static inline int has_newer_autopwm(const struct it87_data *data) -{ - /* - * IT8721F and later have separate registers for the temperature - * mapping and the manual duty cycle. - */ - return data->type == it8721 - || data->type == it8728; -} - static int adc_lsb(const struct it87_data *data, int nr) { int lsb = has_12mv_adc(data) ? 12 : 16; @@ -398,35 +455,6 @@ static const unsigned int pwm_freq[8] = { 750000 / 128, }; -static inline int has_16bit_fans(const struct it87_data *data) -{ - /* - * IT8705F Datasheet 0.4.1, 3h == Version G. - * IT8712F Datasheet 0.9.1, section 8.3.5 indicates 8h == Version J. - * These are the first revisions with 16-bit tachometer support. - */ - return (data->type == it87 && data->revision >= 0x03) - || (data->type == it8712 && data->revision >= 0x08) - || data->type == it8716 - || data->type == it8718 - || data->type == it8720 - || data->type == it8721 - || data->type == it8728 - || data->type == it8782 - || data->type == it8783; -} - -static inline int has_old_autopwm(const struct it87_data *data) -{ - /* - * The old automatic fan speed control interface is implemented - * by IT8705F chips up to revision F and IT8712F chips up to - * revision G. - */ - return (data->type == it87 && data->revision < 0x03) - || (data->type == it8712 && data->revision < 0x08); -} - static int it87_probe(struct platform_device *pdev); static int it87_remove(struct platform_device *pdev); @@ -447,59 +475,22 @@ static struct platform_driver it87_driver = { }; static ssize_t show_in(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in[nr])); -} - -static ssize_t show_in_min(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in_min[nr])); + return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in[nr][index])); } -static ssize_t show_in_max(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in_max[nr])); -} - -static ssize_t set_in_min(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - - struct it87_data *data = dev_get_drvdata(dev); - unsigned long val; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - mutex_lock(&data->update_lock); - data->in_min[nr] = in_to_reg(data, nr, val); - it87_write_value(data, IT87_REG_VIN_MIN(nr), - data->in_min[nr]); - mutex_unlock(&data->update_lock); - return count; -} -static ssize_t set_in_max(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t set_in(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; struct it87_data *data = dev_get_drvdata(dev); unsigned long val; @@ -508,140 +499,167 @@ static ssize_t set_in_max(struct device *dev, struct device_attribute *attr, return -EINVAL; mutex_lock(&data->update_lock); - data->in_max[nr] = in_to_reg(data, nr, val); - it87_write_value(data, IT87_REG_VIN_MAX(nr), - data->in_max[nr]); + data->in[nr][index] = in_to_reg(data, nr, val); + it87_write_value(data, + index == 1 ? IT87_REG_VIN_MIN(nr) + : IT87_REG_VIN_MAX(nr), + data->in[nr][index]); mutex_unlock(&data->update_lock); return count; } -#define show_in_offset(offset) \ -static SENSOR_DEVICE_ATTR(in##offset##_input, S_IRUGO, \ - show_in, NULL, offset); - -#define limit_in_offset(offset) \ -static SENSOR_DEVICE_ATTR(in##offset##_min, S_IRUGO | S_IWUSR, \ - show_in_min, set_in_min, offset); \ -static SENSOR_DEVICE_ATTR(in##offset##_max, S_IRUGO | S_IWUSR, \ - show_in_max, set_in_max, offset); - -show_in_offset(0); -limit_in_offset(0); -show_in_offset(1); -limit_in_offset(1); -show_in_offset(2); -limit_in_offset(2); -show_in_offset(3); -limit_in_offset(3); -show_in_offset(4); -limit_in_offset(4); -show_in_offset(5); -limit_in_offset(5); -show_in_offset(6); -limit_in_offset(6); -show_in_offset(7); -limit_in_offset(7); -show_in_offset(8); +static SENSOR_DEVICE_ATTR_2(in0_input, S_IRUGO, show_in, NULL, 0, 0); +static SENSOR_DEVICE_ATTR_2(in0_min, S_IRUGO | S_IWUSR, show_in, set_in, + 0, 1); +static SENSOR_DEVICE_ATTR_2(in0_max, S_IRUGO | S_IWUSR, show_in, set_in, + 0, 2); + +static SENSOR_DEVICE_ATTR_2(in1_input, S_IRUGO, show_in, NULL, 1, 0); +static SENSOR_DEVICE_ATTR_2(in1_min, S_IRUGO | S_IWUSR, show_in, set_in, + 1, 1); +static SENSOR_DEVICE_ATTR_2(in1_max, S_IRUGO | S_IWUSR, show_in, set_in, + 1, 2); + +static SENSOR_DEVICE_ATTR_2(in2_input, S_IRUGO, show_in, NULL, 2, 0); +static SENSOR_DEVICE_ATTR_2(in2_min, S_IRUGO | S_IWUSR, show_in, set_in, + 2, 1); +static SENSOR_DEVICE_ATTR_2(in2_max, S_IRUGO | S_IWUSR, show_in, set_in, + 2, 2); + +static SENSOR_DEVICE_ATTR_2(in3_input, S_IRUGO, show_in, NULL, 3, 0); +static SENSOR_DEVICE_ATTR_2(in3_min, S_IRUGO | S_IWUSR, show_in, set_in, + 3, 1); +static SENSOR_DEVICE_ATTR_2(in3_max, S_IRUGO | S_IWUSR, show_in, set_in, + 3, 2); + +static SENSOR_DEVICE_ATTR_2(in4_input, S_IRUGO, show_in, NULL, 4, 0); +static SENSOR_DEVICE_ATTR_2(in4_min, S_IRUGO | S_IWUSR, show_in, set_in, + 4, 1); +static SENSOR_DEVICE_ATTR_2(in4_max, S_IRUGO | S_IWUSR, show_in, set_in, + 4, 2); + +static SENSOR_DEVICE_ATTR_2(in5_input, S_IRUGO, show_in, NULL, 5, 0); +static SENSOR_DEVICE_ATTR_2(in5_min, S_IRUGO | S_IWUSR, show_in, set_in, + 5, 1); +static SENSOR_DEVICE_ATTR_2(in5_max, S_IRUGO | S_IWUSR, show_in, set_in, + 5, 2); + +static SENSOR_DEVICE_ATTR_2(in6_input, S_IRUGO, show_in, NULL, 6, 0); +static SENSOR_DEVICE_ATTR_2(in6_min, S_IRUGO | S_IWUSR, show_in, set_in, + 6, 1); +static SENSOR_DEVICE_ATTR_2(in6_max, S_IRUGO | S_IWUSR, show_in, set_in, + 6, 2); + +static SENSOR_DEVICE_ATTR_2(in7_input, S_IRUGO, show_in, NULL, 7, 0); +static SENSOR_DEVICE_ATTR_2(in7_min, S_IRUGO | S_IWUSR, show_in, set_in, + 7, 1); +static SENSOR_DEVICE_ATTR_2(in7_max, S_IRUGO | S_IWUSR, show_in, set_in, + 7, 2); + +static SENSOR_DEVICE_ATTR_2(in8_input, S_IRUGO, show_in, NULL, 8, 0); /* 3 temperatures */ static ssize_t show_temp(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp[nr])); -} -static ssize_t show_temp_max(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_high[nr])); + return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp[nr][index])); } -static ssize_t show_temp_min(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_low[nr])); -} -static ssize_t set_temp_max(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t set_temp(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; struct it87_data *data = dev_get_drvdata(dev); long val; + u8 reg, regval; if (kstrtol(buf, 10, &val) < 0) return -EINVAL; mutex_lock(&data->update_lock); - data->temp_high[nr] = TEMP_TO_REG(val); - it87_write_value(data, IT87_REG_TEMP_HIGH(nr), data->temp_high[nr]); - mutex_unlock(&data->update_lock); - return count; -} -static ssize_t set_temp_min(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = dev_get_drvdata(dev); - long val; - - if (kstrtol(buf, 10, &val) < 0) - return -EINVAL; + switch (index) { + default: + case 1: + reg = IT87_REG_TEMP_LOW(nr); + break; + case 2: + reg = IT87_REG_TEMP_HIGH(nr); + break; + case 3: + regval = it87_read_value(data, IT87_REG_BEEP_ENABLE); + if (!(regval & 0x80)) { + regval |= 0x80; + it87_write_value(data, IT87_REG_BEEP_ENABLE, regval); + } + data->valid = 0; + reg = IT87_REG_TEMP_OFFSET[nr]; + break; + } - mutex_lock(&data->update_lock); - data->temp_low[nr] = TEMP_TO_REG(val); - it87_write_value(data, IT87_REG_TEMP_LOW(nr), data->temp_low[nr]); + data->temp[nr][index] = TEMP_TO_REG(val); + it87_write_value(data, reg, data->temp[nr][index]); mutex_unlock(&data->update_lock); return count; } -#define show_temp_offset(offset) \ -static SENSOR_DEVICE_ATTR(temp##offset##_input, S_IRUGO, \ - show_temp, NULL, offset - 1); \ -static SENSOR_DEVICE_ATTR(temp##offset##_max, S_IRUGO | S_IWUSR, \ - show_temp_max, set_temp_max, offset - 1); \ -static SENSOR_DEVICE_ATTR(temp##offset##_min, S_IRUGO | S_IWUSR, \ - show_temp_min, set_temp_min, offset - 1); - -show_temp_offset(1); -show_temp_offset(2); -show_temp_offset(3); - -static ssize_t show_sensor(struct device *dev, struct device_attribute *attr, - char *buf) + +static SENSOR_DEVICE_ATTR_2(temp1_input, S_IRUGO, show_temp, NULL, 0, 0); +static SENSOR_DEVICE_ATTR_2(temp1_min, S_IRUGO | S_IWUSR, show_temp, set_temp, + 0, 1); +static SENSOR_DEVICE_ATTR_2(temp1_max, S_IRUGO | S_IWUSR, show_temp, set_temp, + 0, 2); +static SENSOR_DEVICE_ATTR_2(temp1_offset, S_IRUGO | S_IWUSR, show_temp, + set_temp, 0, 3); +static SENSOR_DEVICE_ATTR_2(temp2_input, S_IRUGO, show_temp, NULL, 1, 0); +static SENSOR_DEVICE_ATTR_2(temp2_min, S_IRUGO | S_IWUSR, show_temp, set_temp, + 1, 1); +static SENSOR_DEVICE_ATTR_2(temp2_max, S_IRUGO | S_IWUSR, show_temp, set_temp, + 1, 2); +static SENSOR_DEVICE_ATTR_2(temp2_offset, S_IRUGO | S_IWUSR, show_temp, + set_temp, 1, 3); +static SENSOR_DEVICE_ATTR_2(temp3_input, S_IRUGO, show_temp, NULL, 2, 0); +static SENSOR_DEVICE_ATTR_2(temp3_min, S_IRUGO | S_IWUSR, show_temp, set_temp, + 2, 1); +static SENSOR_DEVICE_ATTR_2(temp3_max, S_IRUGO | S_IWUSR, show_temp, set_temp, + 2, 2); +static SENSOR_DEVICE_ATTR_2(temp3_offset, S_IRUGO | S_IWUSR, show_temp, + set_temp, 2, 3); + +static ssize_t show_temp_type(struct device *dev, struct device_attribute *attr, + char *buf) { struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); int nr = sensor_attr->index; struct it87_data *data = it87_update_device(dev); u8 reg = data->sensor; /* In case value is updated while used */ + u8 extra = data->extra; + if ((has_temp_peci(data, nr) && (reg >> 6 == nr + 1)) + || (has_temp_old_peci(data, nr) && (extra & 0x80))) + return sprintf(buf, "6\n"); /* Intel PECI */ if (reg & (1 << nr)) return sprintf(buf, "3\n"); /* thermal diode */ if (reg & (8 << nr)) return sprintf(buf, "4\n"); /* thermistor */ return sprintf(buf, "0\n"); /* disabled */ } -static ssize_t set_sensor(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) + +static ssize_t set_temp_type(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); int nr = sensor_attr->index; struct it87_data *data = dev_get_drvdata(dev); long val; - u8 reg; + u8 reg, extra; if (kstrtol(buf, 10, &val) < 0) return -EINVAL; @@ -649,33 +667,45 @@ static ssize_t set_sensor(struct device *dev, struct device_attribute *attr, reg = it87_read_value(data, IT87_REG_TEMP_ENABLE); reg &= ~(1 << nr); reg &= ~(8 << nr); + if (has_temp_peci(data, nr) && (reg >> 6 == nr + 1 || val == 6)) + reg &= 0x3f; + extra = it87_read_value(data, IT87_REG_TEMP_EXTRA); + if (has_temp_old_peci(data, nr) && ((extra & 0x80) || val == 6)) + extra &= 0x7f; if (val == 2) { /* backwards compatibility */ - dev_warn(dev, "Sensor type 2 is deprecated, please use 4 " - "instead\n"); + dev_warn(dev, + "Sensor type 2 is deprecated, please use 4 instead\n"); val = 4; } - /* 3 = thermal diode; 4 = thermistor; 0 = disabled */ + /* 3 = thermal diode; 4 = thermistor; 6 = Intel PECI; 0 = disabled */ if (val == 3) reg |= 1 << nr; else if (val == 4) reg |= 8 << nr; + else if (has_temp_peci(data, nr) && val == 6) + reg |= (nr + 1) << 6; + else if (has_temp_old_peci(data, nr) && val == 6) + extra |= 0x80; else if (val != 0) return -EINVAL; mutex_lock(&data->update_lock); data->sensor = reg; + data->extra = extra; it87_write_value(data, IT87_REG_TEMP_ENABLE, data->sensor); + if (has_temp_old_peci(data, nr)) + it87_write_value(data, IT87_REG_TEMP_EXTRA, data->extra); data->valid = 0; /* Force cache refresh */ mutex_unlock(&data->update_lock); return count; } -#define show_sensor_offset(offset) \ -static SENSOR_DEVICE_ATTR(temp##offset##_type, S_IRUGO | S_IWUSR, \ - show_sensor, set_sensor, offset - 1); -show_sensor_offset(1); -show_sensor_offset(2); -show_sensor_offset(3); +static SENSOR_DEVICE_ATTR(temp1_type, S_IRUGO | S_IWUSR, show_temp_type, + set_temp_type, 0); +static SENSOR_DEVICE_ATTR(temp2_type, S_IRUGO | S_IWUSR, show_temp_type, + set_temp_type, 1); +static SENSOR_DEVICE_ATTR(temp3_type, S_IRUGO | S_IWUSR, show_temp_type, + set_temp_type, 2); /* 3 Fans */ @@ -692,25 +722,21 @@ static int pwm_mode(const struct it87_data *data, int nr) } static ssize_t show_fan(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; + int speed; struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", FAN_FROM_REG(data->fan[nr], - DIV_FROM_REG(data->fan_div[nr]))); -} -static ssize_t show_fan_min(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", FAN_FROM_REG(data->fan_min[nr], - DIV_FROM_REG(data->fan_div[nr]))); + speed = has_16bit_fans(data) ? + FAN16_FROM_REG(data->fan[nr][index]) : + FAN_FROM_REG(data->fan[nr][index], + DIV_FROM_REG(data->fan_div[nr])); + return sprintf(buf, "%d\n", speed); } + static ssize_t show_fan_div(struct device *dev, struct device_attribute *attr, char *buf) { @@ -747,11 +773,13 @@ static ssize_t show_pwm_freq(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%u\n", pwm_freq[index]); } -static ssize_t set_fan_min(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) + +static ssize_t set_fan(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; struct it87_data *data = dev_get_drvdata(dev); long val; @@ -761,24 +789,36 @@ static ssize_t set_fan_min(struct device *dev, struct device_attribute *attr, return -EINVAL; mutex_lock(&data->update_lock); - reg = it87_read_value(data, IT87_REG_FAN_DIV); - switch (nr) { - case 0: - data->fan_div[nr] = reg & 0x07; - break; - case 1: - data->fan_div[nr] = (reg >> 3) & 0x07; - break; - case 2: - data->fan_div[nr] = (reg & 0x40) ? 3 : 1; - break; + + if (has_16bit_fans(data)) { + data->fan[nr][index] = FAN16_TO_REG(val); + it87_write_value(data, IT87_REG_FAN_MIN[nr], + data->fan[nr][index] & 0xff); + it87_write_value(data, IT87_REG_FANX_MIN[nr], + data->fan[nr][index] >> 8); + } else { + reg = it87_read_value(data, IT87_REG_FAN_DIV); + switch (nr) { + case 0: + data->fan_div[nr] = reg & 0x07; + break; + case 1: + data->fan_div[nr] = (reg >> 3) & 0x07; + break; + case 2: + data->fan_div[nr] = (reg & 0x40) ? 3 : 1; + break; + } + data->fan[nr][index] = + FAN_TO_REG(val, DIV_FROM_REG(data->fan_div[nr])); + it87_write_value(data, IT87_REG_FAN_MIN[nr], + data->fan[nr][index]); } - data->fan_min[nr] = FAN_TO_REG(val, DIV_FROM_REG(data->fan_div[nr])); - it87_write_value(data, IT87_REG_FAN_MIN[nr], data->fan_min[nr]); mutex_unlock(&data->update_lock); return count; } + static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -797,7 +837,7 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, old = it87_read_value(data, IT87_REG_FAN_DIV); /* Save fan min limit */ - min = FAN_FROM_REG(data->fan_min[nr], DIV_FROM_REG(data->fan_div[nr])); + min = FAN_FROM_REG(data->fan[nr][1], DIV_FROM_REG(data->fan_div[nr])); switch (nr) { case 0: @@ -818,8 +858,8 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, it87_write_value(data, IT87_REG_FAN_DIV, val); /* Restore fan min limit */ - data->fan_min[nr] = FAN_TO_REG(min, DIV_FROM_REG(data->fan_div[nr])); - it87_write_value(data, IT87_REG_FAN_MIN[nr], data->fan_min[nr]); + data->fan[nr][1] = FAN_TO_REG(min, DIV_FROM_REG(data->fan_div[nr])); + it87_write_value(data, IT87_REG_FAN_MIN[nr], data->fan[nr][1]); mutex_unlock(&data->update_lock); return count; @@ -843,8 +883,8 @@ static int check_trip_points(struct device *dev, int nr) } if (err) { - dev_err(dev, "Inconsistent trip points, not switching to " - "automatic mode\n"); + dev_err(dev, + "Inconsistent trip points, not switching to automatic mode\n"); dev_err(dev, "Adjust the trip points and try again\n"); } return err; @@ -1092,118 +1132,106 @@ static ssize_t set_auto_temp(struct device *dev, return count; } -#define show_fan_offset(offset) \ -static SENSOR_DEVICE_ATTR(fan##offset##_input, S_IRUGO, \ - show_fan, NULL, offset - 1); \ -static SENSOR_DEVICE_ATTR(fan##offset##_min, S_IRUGO | S_IWUSR, \ - show_fan_min, set_fan_min, offset - 1); \ -static SENSOR_DEVICE_ATTR(fan##offset##_div, S_IRUGO | S_IWUSR, \ - show_fan_div, set_fan_div, offset - 1); - -show_fan_offset(1); -show_fan_offset(2); -show_fan_offset(3); - -#define show_pwm_offset(offset) \ -static SENSOR_DEVICE_ATTR(pwm##offset##_enable, S_IRUGO | S_IWUSR, \ - show_pwm_enable, set_pwm_enable, offset - 1); \ -static SENSOR_DEVICE_ATTR(pwm##offset, S_IRUGO | S_IWUSR, \ - show_pwm, set_pwm, offset - 1); \ -static DEVICE_ATTR(pwm##offset##_freq, \ - (offset == 1 ? S_IRUGO | S_IWUSR : S_IRUGO), \ - show_pwm_freq, (offset == 1 ? set_pwm_freq : NULL)); \ -static SENSOR_DEVICE_ATTR(pwm##offset##_auto_channels_temp, \ - S_IRUGO | S_IWUSR, show_pwm_temp_map, set_pwm_temp_map, \ - offset - 1); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point1_pwm, \ - S_IRUGO | S_IWUSR, show_auto_pwm, set_auto_pwm, \ - offset - 1, 0); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point2_pwm, \ - S_IRUGO | S_IWUSR, show_auto_pwm, set_auto_pwm, \ - offset - 1, 1); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point3_pwm, \ - S_IRUGO | S_IWUSR, show_auto_pwm, set_auto_pwm, \ - offset - 1, 2); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point4_pwm, \ - S_IRUGO, show_auto_pwm, NULL, offset - 1, 3); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point1_temp, \ - S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp, \ - offset - 1, 1); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point1_temp_hyst, \ - S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp, \ - offset - 1, 0); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point2_temp, \ - S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp, \ - offset - 1, 2); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point3_temp, \ - S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp, \ - offset - 1, 3); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point4_temp, \ - S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp, \ - offset - 1, 4); - -show_pwm_offset(1); -show_pwm_offset(2); -show_pwm_offset(3); - -/* A different set of callbacks for 16-bit fans */ -static ssize_t show_fan16(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", FAN16_FROM_REG(data->fan[nr])); -} - -static ssize_t show_fan16_min(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", FAN16_FROM_REG(data->fan_min[nr])); -} - -static ssize_t set_fan16_min(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = dev_get_drvdata(dev); - long val; - - if (kstrtol(buf, 10, &val) < 0) - return -EINVAL; - - mutex_lock(&data->update_lock); - data->fan_min[nr] = FAN16_TO_REG(val); - it87_write_value(data, IT87_REG_FAN_MIN[nr], - data->fan_min[nr] & 0xff); - it87_write_value(data, IT87_REG_FANX_MIN[nr], - data->fan_min[nr] >> 8); - mutex_unlock(&data->update_lock); - return count; -} - -/* - * We want to use the same sysfs file names as 8-bit fans, but we need - * different variable names, so we have to use SENSOR_ATTR instead of - * SENSOR_DEVICE_ATTR. - */ -#define show_fan16_offset(offset) \ -static struct sensor_device_attribute sensor_dev_attr_fan##offset##_input16 \ - = SENSOR_ATTR(fan##offset##_input, S_IRUGO, \ - show_fan16, NULL, offset - 1); \ -static struct sensor_device_attribute sensor_dev_attr_fan##offset##_min16 \ - = SENSOR_ATTR(fan##offset##_min, S_IRUGO | S_IWUSR, \ - show_fan16_min, set_fan16_min, offset - 1) - -show_fan16_offset(1); -show_fan16_offset(2); -show_fan16_offset(3); -show_fan16_offset(4); -show_fan16_offset(5); +static SENSOR_DEVICE_ATTR_2(fan1_input, S_IRUGO, show_fan, NULL, 0, 0); +static SENSOR_DEVICE_ATTR_2(fan1_min, S_IRUGO | S_IWUSR, show_fan, set_fan, + 0, 1); +static SENSOR_DEVICE_ATTR(fan1_div, S_IRUGO | S_IWUSR, show_fan_div, + set_fan_div, 0); + +static SENSOR_DEVICE_ATTR_2(fan2_input, S_IRUGO, show_fan, NULL, 1, 0); +static SENSOR_DEVICE_ATTR_2(fan2_min, S_IRUGO | S_IWUSR, show_fan, set_fan, + 1, 1); +static SENSOR_DEVICE_ATTR(fan2_div, S_IRUGO | S_IWUSR, show_fan_div, + set_fan_div, 1); + +static SENSOR_DEVICE_ATTR_2(fan3_input, S_IRUGO, show_fan, NULL, 2, 0); +static SENSOR_DEVICE_ATTR_2(fan3_min, S_IRUGO | S_IWUSR, show_fan, set_fan, + 2, 1); +static SENSOR_DEVICE_ATTR(fan3_div, S_IRUGO | S_IWUSR, show_fan_div, + set_fan_div, 2); + +static SENSOR_DEVICE_ATTR_2(fan4_input, S_IRUGO, show_fan, NULL, 3, 0); +static SENSOR_DEVICE_ATTR_2(fan4_min, S_IRUGO | S_IWUSR, show_fan, set_fan, + 3, 1); + +static SENSOR_DEVICE_ATTR_2(fan5_input, S_IRUGO, show_fan, NULL, 4, 0); +static SENSOR_DEVICE_ATTR_2(fan5_min, S_IRUGO | S_IWUSR, show_fan, set_fan, + 4, 1); + +static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, + show_pwm_enable, set_pwm_enable, 0); +static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 0); +static DEVICE_ATTR(pwm1_freq, S_IRUGO | S_IWUSR, show_pwm_freq, set_pwm_freq); +static SENSOR_DEVICE_ATTR(pwm1_auto_channels_temp, S_IRUGO | S_IWUSR, + show_pwm_temp_map, set_pwm_temp_map, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 0, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point2_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 0, 1); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point3_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 0, 2); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point4_pwm, S_IRUGO, + show_auto_pwm, NULL, 0, 3); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 0, 1); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_temp_hyst, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 0, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point2_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 0, 2); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point3_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 0, 3); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point4_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 0, 4); + +static SENSOR_DEVICE_ATTR(pwm2_enable, S_IRUGO | S_IWUSR, + show_pwm_enable, set_pwm_enable, 1); +static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 1); +static DEVICE_ATTR(pwm2_freq, S_IRUGO, show_pwm_freq, NULL); +static SENSOR_DEVICE_ATTR(pwm2_auto_channels_temp, S_IRUGO | S_IWUSR, + show_pwm_temp_map, set_pwm_temp_map, 1); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 1, 0); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point2_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 1, 1); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point3_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 1, 2); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point4_pwm, S_IRUGO, + show_auto_pwm, NULL, 1, 3); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 1, 1); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_temp_hyst, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 1, 0); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point2_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 1, 2); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point3_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 1, 3); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point4_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 1, 4); + +static SENSOR_DEVICE_ATTR(pwm3_enable, S_IRUGO | S_IWUSR, + show_pwm_enable, set_pwm_enable, 2); +static SENSOR_DEVICE_ATTR(pwm3, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 2); +static DEVICE_ATTR(pwm3_freq, S_IRUGO, show_pwm_freq, NULL); +static SENSOR_DEVICE_ATTR(pwm3_auto_channels_temp, S_IRUGO | S_IWUSR, + show_pwm_temp_map, set_pwm_temp_map, 2); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 2, 0); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point2_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 2, 1); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point3_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 2, 2); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point4_pwm, S_IRUGO, + show_auto_pwm, NULL, 2, 3); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 2, 1); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_temp_hyst, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 2, 0); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point2_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 2, 2); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point3_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 2, 3); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point4_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 2, 4); /* Alarms */ static ssize_t show_alarms(struct device *dev, struct device_attribute *attr, @@ -1471,6 +1499,12 @@ static const struct attribute_group it87_group_temp[3] = { { .attrs = it87_attributes_temp[2] }, }; +static struct attribute *it87_attributes_temp_offset[] = { + &sensor_dev_attr_temp1_offset.dev_attr.attr, + &sensor_dev_attr_temp2_offset.dev_attr.attr, + &sensor_dev_attr_temp3_offset.dev_attr.attr, +}; + static struct attribute *it87_attributes[] = { &dev_attr_alarms.attr, &sensor_dev_attr_intrusion0_alarm.dev_attr.attr, @@ -1500,73 +1534,47 @@ static struct attribute *it87_attributes_temp_beep[] = { &sensor_dev_attr_temp3_beep.dev_attr.attr, }; -static struct attribute *it87_attributes_fan16[5][3+1] = { { - &sensor_dev_attr_fan1_input16.dev_attr.attr, - &sensor_dev_attr_fan1_min16.dev_attr.attr, +static struct attribute *it87_attributes_fan[5][3+1] = { { + &sensor_dev_attr_fan1_input.dev_attr.attr, + &sensor_dev_attr_fan1_min.dev_attr.attr, &sensor_dev_attr_fan1_alarm.dev_attr.attr, NULL }, { - &sensor_dev_attr_fan2_input16.dev_attr.attr, - &sensor_dev_attr_fan2_min16.dev_attr.attr, + &sensor_dev_attr_fan2_input.dev_attr.attr, + &sensor_dev_attr_fan2_min.dev_attr.attr, &sensor_dev_attr_fan2_alarm.dev_attr.attr, NULL }, { - &sensor_dev_attr_fan3_input16.dev_attr.attr, - &sensor_dev_attr_fan3_min16.dev_attr.attr, + &sensor_dev_attr_fan3_input.dev_attr.attr, + &sensor_dev_attr_fan3_min.dev_attr.attr, &sensor_dev_attr_fan3_alarm.dev_attr.attr, NULL }, { - &sensor_dev_attr_fan4_input16.dev_attr.attr, - &sensor_dev_attr_fan4_min16.dev_attr.attr, + &sensor_dev_attr_fan4_input.dev_attr.attr, + &sensor_dev_attr_fan4_min.dev_attr.attr, &sensor_dev_attr_fan4_alarm.dev_attr.attr, NULL }, { - &sensor_dev_attr_fan5_input16.dev_attr.attr, - &sensor_dev_attr_fan5_min16.dev_attr.attr, + &sensor_dev_attr_fan5_input.dev_attr.attr, + &sensor_dev_attr_fan5_min.dev_attr.attr, &sensor_dev_attr_fan5_alarm.dev_attr.attr, NULL } }; -static const struct attribute_group it87_group_fan16[5] = { - { .attrs = it87_attributes_fan16[0] }, - { .attrs = it87_attributes_fan16[1] }, - { .attrs = it87_attributes_fan16[2] }, - { .attrs = it87_attributes_fan16[3] }, - { .attrs = it87_attributes_fan16[4] }, +static const struct attribute_group it87_group_fan[5] = { + { .attrs = it87_attributes_fan[0] }, + { .attrs = it87_attributes_fan[1] }, + { .attrs = it87_attributes_fan[2] }, + { .attrs = it87_attributes_fan[3] }, + { .attrs = it87_attributes_fan[4] }, }; -static struct attribute *it87_attributes_fan[3][4+1] = { { - &sensor_dev_attr_fan1_input.dev_attr.attr, - &sensor_dev_attr_fan1_min.dev_attr.attr, +static const struct attribute *it87_attributes_fan_div[] = { &sensor_dev_attr_fan1_div.dev_attr.attr, - &sensor_dev_attr_fan1_alarm.dev_attr.attr, - NULL -}, { - &sensor_dev_attr_fan2_input.dev_attr.attr, - &sensor_dev_attr_fan2_min.dev_attr.attr, &sensor_dev_attr_fan2_div.dev_attr.attr, - &sensor_dev_attr_fan2_alarm.dev_attr.attr, - NULL -}, { - &sensor_dev_attr_fan3_input.dev_attr.attr, - &sensor_dev_attr_fan3_min.dev_attr.attr, &sensor_dev_attr_fan3_div.dev_attr.attr, - &sensor_dev_attr_fan3_alarm.dev_attr.attr, - NULL -} }; - -static const struct attribute_group it87_group_fan[3] = { - { .attrs = it87_attributes_fan[0] }, - { .attrs = it87_attributes_fan[1] }, - { .attrs = it87_attributes_fan[2] }, }; -static const struct attribute_group * -it87_get_fan_group(const struct it87_data *data) -{ - return has_16bit_fans(data) ? it87_group_fan16 : it87_group_fan; -} - static struct attribute *it87_attributes_pwm[3][4+1] = { { &sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, @@ -1925,7 +1933,6 @@ static void it87_remove_files(struct device *dev) { struct it87_data *data = platform_get_drvdata(pdev); struct it87_sio_data *sio_data = dev->platform_data; - const struct attribute_group *fan_group = it87_get_fan_group(data); int i; sysfs_remove_group(&dev->kobj, &it87_group); @@ -1941,6 +1948,9 @@ static void it87_remove_files(struct device *dev) if (!(data->has_temp & (1 << i))) continue; sysfs_remove_group(&dev->kobj, &it87_group_temp[i]); + if (has_temp_offset(data)) + sysfs_remove_file(&dev->kobj, + it87_attributes_temp_offset[i]); if (sio_data->beep_pin) sysfs_remove_file(&dev->kobj, it87_attributes_temp_beep[i]); @@ -1948,10 +1958,13 @@ static void it87_remove_files(struct device *dev) for (i = 0; i < 5; i++) { if (!(data->has_fan & (1 << i))) continue; - sysfs_remove_group(&dev->kobj, &fan_group[i]); + sysfs_remove_group(&dev->kobj, &it87_group_fan[i]); if (sio_data->beep_pin) sysfs_remove_file(&dev->kobj, it87_attributes_fan_beep[i]); + if (i < 3 && !has_16bit_fans(data)) + sysfs_remove_file(&dev->kobj, + it87_attributes_fan_div[i]); } for (i = 0; i < 3; i++) { if (sio_data->skip_pwm & (1 << 0)) @@ -1972,21 +1985,9 @@ static int it87_probe(struct platform_device *pdev) struct resource *res; struct device *dev = &pdev->dev; struct it87_sio_data *sio_data = dev->platform_data; - const struct attribute_group *fan_group; int err = 0, i; int enable_pwm_interface; int fan_beep_need_rw; - static const char * const names[] = { - "it87", - "it8712", - "it8716", - "it8718", - "it8720", - "it8721", - "it8728", - "it8782", - "it8783", - }; res = platform_get_resource(pdev, IORESOURCE_IO, 0); if (!devm_request_region(&pdev->dev, res->start, IT87_EC_EXTENT, @@ -2003,8 +2004,31 @@ static int it87_probe(struct platform_device *pdev) data->addr = res->start; data->type = sio_data->type; - data->revision = sio_data->revision; - data->name = names[sio_data->type]; + data->features = it87_devices[sio_data->type].features; + data->peci_mask = it87_devices[sio_data->type].peci_mask; + data->old_peci_mask = it87_devices[sio_data->type].old_peci_mask; + data->name = it87_devices[sio_data->type].name; + /* + * IT8705F Datasheet 0.4.1, 3h == Version G. + * IT8712F Datasheet 0.9.1, section 8.3.5 indicates 8h == Version J. + * These are the first revisions with 16-bit tachometer support. + */ + switch (data->type) { + case it87: + if (sio_data->revision >= 0x03) { + data->features &= ~FEAT_OLD_AUTOPWM; + data->features |= FEAT_16BIT_FANS; + } + break; + case it8712: + if (sio_data->revision >= 0x08) { + data->features &= ~FEAT_OLD_AUTOPWM; + data->features |= FEAT_16BIT_FANS; + } + break; + default: + break; + } /* Now, we do the remaining detection. */ if ((it87_read_value(data, IT87_REG_CONFIG) & 0x80) @@ -2068,6 +2092,12 @@ static int it87_probe(struct platform_device *pdev) err = sysfs_create_group(&dev->kobj, &it87_group_temp[i]); if (err) goto error; + if (has_temp_offset(data)) { + err = sysfs_create_file(&dev->kobj, + it87_attributes_temp_offset[i]); + if (err) + goto error; + } if (sio_data->beep_pin) { err = sysfs_create_file(&dev->kobj, it87_attributes_temp_beep[i]); @@ -2077,15 +2107,21 @@ static int it87_probe(struct platform_device *pdev) } /* Do not create fan files for disabled fans */ - fan_group = it87_get_fan_group(data); fan_beep_need_rw = 1; for (i = 0; i < 5; i++) { if (!(data->has_fan & (1 << i))) continue; - err = sysfs_create_group(&dev->kobj, &fan_group[i]); + err = sysfs_create_group(&dev->kobj, &it87_group_fan[i]); if (err) goto error; + if (i < 3 && !has_16bit_fans(data)) { + err = sysfs_create_file(&dev->kobj, + it87_attributes_fan_div[i]); + if (err) + goto error; + } + if (sio_data->beep_pin) { err = sysfs_create_file(&dev->kobj, it87_attributes_fan_beep[i]); @@ -2221,8 +2257,8 @@ static int it87_check_pwm(struct device *dev) * PWM interface). */ if (!((pwm[0] | pwm[1] | pwm[2]) & 0x80)) { - dev_info(dev, "Reconfiguring PWM to " - "active high polarity\n"); + dev_info(dev, + "Reconfiguring PWM to active high polarity\n"); it87_write_value(data, IT87_REG_FAN_CTL, tmp | 0x87); for (i = 0; i < 3; i++) @@ -2232,16 +2268,16 @@ static int it87_check_pwm(struct device *dev) return 1; } - dev_info(dev, "PWM configuration is " - "too broken to be fixed\n"); + dev_info(dev, + "PWM configuration is too broken to be fixed\n"); } - dev_info(dev, "Detected broken BIOS " - "defaults, disabling PWM interface\n"); + dev_info(dev, + "Detected broken BIOS defaults, disabling PWM interface\n"); return 0; } else if (fix_pwm_polarity) { - dev_info(dev, "PWM configuration looks " - "sane, won't touch\n"); + dev_info(dev, + "PWM configuration looks sane, won't touch\n"); } return 1; @@ -2389,42 +2425,46 @@ static struct it87_data *it87_update_device(struct device *dev) it87_read_value(data, IT87_REG_CONFIG) | 0x40); } for (i = 0; i <= 7; i++) { - data->in[i] = + data->in[i][0] = it87_read_value(data, IT87_REG_VIN(i)); - data->in_min[i] = + data->in[i][1] = it87_read_value(data, IT87_REG_VIN_MIN(i)); - data->in_max[i] = + data->in[i][2] = it87_read_value(data, IT87_REG_VIN_MAX(i)); } /* in8 (battery) has no limit registers */ - data->in[8] = it87_read_value(data, IT87_REG_VIN(8)); + data->in[8][0] = it87_read_value(data, IT87_REG_VIN(8)); for (i = 0; i < 5; i++) { /* Skip disabled fans */ if (!(data->has_fan & (1 << i))) continue; - data->fan_min[i] = + data->fan[i][1] = it87_read_value(data, IT87_REG_FAN_MIN[i]); - data->fan[i] = it87_read_value(data, + data->fan[i][0] = it87_read_value(data, IT87_REG_FAN[i]); /* Add high byte if in 16-bit mode */ if (has_16bit_fans(data)) { - data->fan[i] |= it87_read_value(data, + data->fan[i][0] |= it87_read_value(data, IT87_REG_FANX[i]) << 8; - data->fan_min[i] |= it87_read_value(data, + data->fan[i][1] |= it87_read_value(data, IT87_REG_FANX_MIN[i]) << 8; } } for (i = 0; i < 3; i++) { if (!(data->has_temp & (1 << i))) continue; - data->temp[i] = + data->temp[i][0] = it87_read_value(data, IT87_REG_TEMP(i)); - data->temp_high[i] = - it87_read_value(data, IT87_REG_TEMP_HIGH(i)); - data->temp_low[i] = + data->temp[i][1] = it87_read_value(data, IT87_REG_TEMP_LOW(i)); + data->temp[i][2] = + it87_read_value(data, IT87_REG_TEMP_HIGH(i)); + if (has_temp_offset(data)) + data->temp[i][3] = + it87_read_value(data, + IT87_REG_TEMP_OFFSET[i]); } /* Newer chips don't have clock dividers */ @@ -2448,6 +2488,7 @@ static struct it87_data *it87_update_device(struct device *dev) it87_update_pwm_ctrl(data, i); data->sensor = it87_read_value(data, IT87_REG_TEMP_ENABLE); + data->extra = it87_read_value(data, IT87_REG_TEMP_EXTRA); /* * The IT8705F does not have VID capability. * The IT8718F and later don't use IT87_REG_VID for the @@ -2549,8 +2590,7 @@ static void __exit sm_it87_exit(void) } -MODULE_AUTHOR("Chris Gauthron, " - "Jean Delvare <khali@linux-fr.org>"); +MODULE_AUTHOR("Chris Gauthron, Jean Delvare <khali@linux-fr.org>"); MODULE_DESCRIPTION("IT8705F/IT871xF/IT872xF hardware monitoring driver"); module_param(update_vbat, bool, 0); MODULE_PARM_DESC(update_vbat, "Update vbat if set else return powerup value"); diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index 55ac41c05561..0e8ffd6059a0 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -1,7 +1,7 @@ /* * w83627ehf - Driver for the hardware monitoring functionality of * the Winbond W83627EHF Super-I/O chip - * Copyright (C) 2005-2011 Jean Delvare <khali@linux-fr.org> + * Copyright (C) 2005-2012 Jean Delvare <khali@linux-fr.org> * Copyright (C) 2006 Yuan Mu (Winbond), * Rudolf Marek <r.marek@assembler.cz> * David Hubbard <david.c.hubbard@gmail.com> @@ -502,6 +502,13 @@ struct w83627ehf_data { u16 have_temp_offset; u8 in6_skip:1; u8 temp3_val_only:1; + +#ifdef CONFIG_PM + /* Remember extra register values over suspend/resume */ + u8 vbat; + u8 fandiv1; + u8 fandiv2; +#endif }; struct w83627ehf_sio_data { @@ -898,6 +905,8 @@ static struct w83627ehf_data *w83627ehf_update_device(struct device *dev) data->temp_max_hyst[i] = w83627ehf_read_temp(data, data->reg_temp_hyst[i]); + if (i > 2) + continue; if (data->have_temp_offset & (1 << i)) data->temp_offset[i] = w83627ehf_read_value(data, @@ -2608,10 +2617,98 @@ static int w83627ehf_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM +static int w83627ehf_suspend(struct device *dev) +{ + struct w83627ehf_data *data = w83627ehf_update_device(dev); + struct w83627ehf_sio_data *sio_data = dev->platform_data; + + mutex_lock(&data->update_lock); + data->vbat = w83627ehf_read_value(data, W83627EHF_REG_VBAT); + if (sio_data->kind == nct6775) { + data->fandiv1 = w83627ehf_read_value(data, NCT6775_REG_FANDIV1); + data->fandiv2 = w83627ehf_read_value(data, NCT6775_REG_FANDIV2); + } + mutex_unlock(&data->update_lock); + + return 0; +} + +static int w83627ehf_resume(struct device *dev) +{ + struct w83627ehf_data *data = dev_get_drvdata(dev); + struct w83627ehf_sio_data *sio_data = dev->platform_data; + int i; + + mutex_lock(&data->update_lock); + data->bank = 0xff; /* Force initial bank selection */ + + /* Restore limits */ + for (i = 0; i < data->in_num; i++) { + if ((i == 6) && data->in6_skip) + continue; + + w83627ehf_write_value(data, W83627EHF_REG_IN_MIN(i), + data->in_min[i]); + w83627ehf_write_value(data, W83627EHF_REG_IN_MAX(i), + data->in_max[i]); + } + + for (i = 0; i < 5; i++) { + if (!(data->has_fan_min & (1 << i))) + continue; + + w83627ehf_write_value(data, data->REG_FAN_MIN[i], + data->fan_min[i]); + } + + for (i = 0; i < NUM_REG_TEMP; i++) { + if (!(data->have_temp & (1 << i))) + continue; + + if (data->reg_temp_over[i]) + w83627ehf_write_temp(data, data->reg_temp_over[i], + data->temp_max[i]); + if (data->reg_temp_hyst[i]) + w83627ehf_write_temp(data, data->reg_temp_hyst[i], + data->temp_max_hyst[i]); + if (i > 2) + continue; + if (data->have_temp_offset & (1 << i)) + w83627ehf_write_value(data, + W83627EHF_REG_TEMP_OFFSET[i], + data->temp_offset[i]); + } + + /* Restore other settings */ + w83627ehf_write_value(data, W83627EHF_REG_VBAT, data->vbat); + if (sio_data->kind == nct6775) { + w83627ehf_write_value(data, NCT6775_REG_FANDIV1, data->fandiv1); + w83627ehf_write_value(data, NCT6775_REG_FANDIV2, data->fandiv2); + } + + /* Force re-reading all values */ + data->valid = 0; + mutex_unlock(&data->update_lock); + + return 0; +} + +static const struct dev_pm_ops w83627ehf_dev_pm_ops = { + .suspend = w83627ehf_suspend, + .resume = w83627ehf_resume, +}; + +#define W83627EHF_DEV_PM_OPS (&w83627ehf_dev_pm_ops) +#else +#define W83627EHF_DEV_PM_OPS NULL +#endif /* CONFIG_PM */ + static struct platform_driver w83627ehf_driver = { .driver = { .owner = THIS_MODULE, .name = DRVNAME, + .pm = W83627EHF_DEV_PM_OPS, }, .probe = w83627ehf_probe, .remove = w83627ehf_remove, diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c index 7f68b8309d10..81f486520cea 100644 --- a/drivers/hwmon/w83627hf.c +++ b/drivers/hwmon/w83627hf.c @@ -5,7 +5,7 @@ * Philip Edelbrock <phil@netroedge.com>, * and Mark Studebaker <mdsxyz123@yahoo.com> * Ported to 2.6 by Bernhard C. Schrenk <clemy@clemy.org> - * Copyright (c) 2007 Jean Delvare <khali@linux-fr.org> + * Copyright (c) 2007 - 1012 Jean Delvare <khali@linux-fr.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -389,6 +389,12 @@ struct w83627hf_data { */ u8 vrm; u8 vrm_ovt; /* Register value, 627THF/637HF/687THF only */ + +#ifdef CONFIG_PM + /* Remember extra register values over suspend/resume */ + u8 scfg1; + u8 scfg2; +#endif }; @@ -401,10 +407,77 @@ static void w83627hf_update_fan_div(struct w83627hf_data *data); static struct w83627hf_data *w83627hf_update_device(struct device *dev); static void w83627hf_init_device(struct platform_device *pdev); +#ifdef CONFIG_PM +static int w83627hf_suspend(struct device *dev) +{ + struct w83627hf_data *data = w83627hf_update_device(dev); + + mutex_lock(&data->update_lock); + data->scfg1 = w83627hf_read_value(data, W83781D_REG_SCFG1); + data->scfg2 = w83627hf_read_value(data, W83781D_REG_SCFG2); + mutex_unlock(&data->update_lock); + + return 0; +} + +static int w83627hf_resume(struct device *dev) +{ + struct w83627hf_data *data = dev_get_drvdata(dev); + int i, num_temps = (data->type == w83697hf) ? 2 : 3; + + /* Restore limits */ + mutex_lock(&data->update_lock); + for (i = 0; i <= 8; i++) { + /* skip missing sensors */ + if (((data->type == w83697hf) && (i == 1)) || + ((data->type != w83627hf && data->type != w83697hf) + && (i == 5 || i == 6))) + continue; + w83627hf_write_value(data, W83781D_REG_IN_MAX(i), + data->in_max[i]); + w83627hf_write_value(data, W83781D_REG_IN_MIN(i), + data->in_min[i]); + } + for (i = 0; i <= 2; i++) + w83627hf_write_value(data, W83627HF_REG_FAN_MIN(i), + data->fan_min[i]); + for (i = 0; i < num_temps; i++) { + w83627hf_write_value(data, w83627hf_reg_temp_over[i], + data->temp_max[i]); + w83627hf_write_value(data, w83627hf_reg_temp_hyst[i], + data->temp_max_hyst[i]); + } + + /* Fixup BIOS bugs */ + if (data->type == w83627thf || data->type == w83637hf || + data->type == w83687thf) + w83627hf_write_value(data, W83627THF_REG_VRM_OVT_CFG, + data->vrm_ovt); + w83627hf_write_value(data, W83781D_REG_SCFG1, data->scfg1); + w83627hf_write_value(data, W83781D_REG_SCFG2, data->scfg2); + + /* Force re-reading all values */ + data->valid = 0; + mutex_unlock(&data->update_lock); + + return 0; +} + +static const struct dev_pm_ops w83627hf_dev_pm_ops = { + .suspend = w83627hf_suspend, + .resume = w83627hf_resume, +}; + +#define W83627HF_DEV_PM_OPS (&w83627hf_dev_pm_ops) +#else +#define W83627HF_DEV_PM_OPS NULL +#endif /* CONFIG_PM */ + static struct platform_driver w83627hf_driver = { .driver = { .owner = THIS_MODULE, .name = DRVNAME, + .pm = W83627HF_DEV_PM_OPS, }, .probe = w83627hf_probe, .remove = w83627hf_remove, @@ -1659,8 +1732,10 @@ static void w83627hf_init_device(struct platform_device *pdev) /* Minimize conflicts with other winbond i2c-only clients... */ /* disable i2c subclients... how to disable main i2c client?? */ /* force i2c address to relatively uncommon address */ - w83627hf_write_value(data, W83781D_REG_I2C_SUBADDR, 0x89); - w83627hf_write_value(data, W83781D_REG_I2C_ADDR, force_i2c); + if (type == w83627hf) { + w83627hf_write_value(data, W83781D_REG_I2C_SUBADDR, 0x89); + w83627hf_write_value(data, W83781D_REG_I2C_ADDR, force_i2c); + } /* Read VID only once */ if (type == w83627hf || type == w83637hf) { diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 5de86968379d..c13745cde7fa 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -38,10 +38,12 @@ #include <linux/inetdevice.h> #include <linux/ip.h> #include <linux/tcp.h> +#include <linux/if_vlan.h> #include <net/neighbour.h> #include <net/netevent.h> #include <net/route.h> +#include <net/tcp.h> #include "iw_cxgb4.h" @@ -61,6 +63,14 @@ static char *states[] = { NULL, }; +static int nocong; +module_param(nocong, int, 0644); +MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)"); + +static int enable_ecn; +module_param(enable_ecn, int, 0644); +MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); + static int dack_mode = 1; module_param(dack_mode, int, 0644); MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); @@ -265,6 +275,7 @@ void _c4iw_free_ep(struct kref *kref) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); } kfree(ep); } @@ -441,6 +452,50 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } +#define VLAN_NONE 0xfff +#define FILTER_SEL_VLAN_NONE 0xffff +#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */ +#define FILTER_SEL_WIDTH_VIN_P_FC \ + (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/ +#define FILTER_SEL_WIDTH_TAG_P_FC \ + (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */ +#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) + +static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst, + struct l2t_entry *l2t) +{ + unsigned int ntuple = 0; + u32 viid; + + switch (dev->rdev.lldi.filt_mode) { + + /* default filter mode */ + case HW_TPL_FR_MT_PR_IV_P_FC: + if (l2t->vlan == VLAN_NONE) + ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; + else { + ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC; + ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC; + } + ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << + FILTER_SEL_WIDTH_VLD_TAG_P_FC; + break; + case HW_TPL_FR_MT_PR_OV_P_FC: { + viid = cxgb4_port_viid(l2t->neigh->dev); + + ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC; + ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC; + ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC; + ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << + FILTER_SEL_WIDTH_VLD_TAG_P_FC; + break; + } + default: + break; + } + return ntuple; +} + static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; @@ -463,7 +518,8 @@ static int send_connect(struct c4iw_ep *ep) cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - opt0 = KEEP_ALIVE(1) | + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | @@ -474,6 +530,7 @@ static int send_connect(struct c4iw_ep *ep) ULP_MODE(ULP_MODE_TCPDDP) | RCV_BUFSIZ(rcv_win>>10); opt2 = RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); if (enable_tcp_timestamps) opt2 |= TSTAMPS_EN(1); @@ -492,8 +549,9 @@ static int send_connect(struct c4iw_ep *ep) req->local_ip = ep->com.local_addr.sin_addr.s_addr; req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; req->opt0 = cpu_to_be64(opt0); - req->params = 0; + req->params = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, ep->l2t)); req->opt2 = cpu_to_be32(opt2); + set_bit(ACT_OPEN_REQ, &ep->com.history); return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -770,6 +828,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) /* setup the hwtid for this connection */ ep->hwtid = tid; cxgb4_insert_tid(t, ep, tid); + insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); @@ -777,7 +836,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_emss(ep, ntohs(req->tcp_opt)); /* dealloc the atid */ + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); + set_bit(ACT_ESTAB, &ep->com.history); /* start MPA negotiation */ send_flowc(ep, NULL); @@ -803,6 +864,7 @@ static void close_complete_upcall(struct c4iw_ep *ep) ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; + set_bit(CLOSE_UPCALL, &ep->com.history); } } @@ -811,6 +873,7 @@ static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); close_complete_upcall(ep); state_set(&ep->com, ABORTING); + set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); } @@ -825,6 +888,7 @@ static void peer_close_upcall(struct c4iw_ep *ep) PDBG("peer close delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(DISCONN_UPCALL, &ep->com.history); } } @@ -843,6 +907,7 @@ static void peer_abort_upcall(struct c4iw_ep *ep) ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; + set_bit(ABORT_UPCALL, &ep->com.history); } } @@ -875,6 +940,7 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status); + set_bit(CONN_RPL_UPCALL, &ep->com.history); ep->com.cm_id->event_handler(ep->com.cm_id, &event); if (status < 0) { @@ -915,6 +981,7 @@ static void connect_request_upcall(struct c4iw_ep *ep) ep->parent_ep->com.cm_id, &event); } + set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); ep->parent_ep = NULL; } @@ -931,6 +998,7 @@ static void established_upcall(struct c4iw_ep *ep) if (ep->com.cm_id) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(ESTAB_UPCALL, &ep->com.history); } } @@ -1316,6 +1384,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int dlen = ntohs(hdr->len); unsigned int tid = GET_TID(hdr); struct tid_info *t = dev->rdev.lldi.tids; + __u8 status = hdr->status; ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); @@ -1338,9 +1407,9 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) case MPA_REP_SENT: break; default: - printk(KERN_ERR MOD "%s Unexpected streaming data." - " ep %p state %d tid %u\n", - __func__, ep, state_read(&ep->com), ep->hwtid); + pr_err("%s Unexpected streaming data." \ + " ep %p state %d tid %u status %d\n", + __func__, ep, state_read(&ep->com), ep->hwtid, status); /* * The ep will timeout and inform the ULP of the failure. @@ -1383,6 +1452,63 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) +{ + struct sk_buff *skb; + struct fw_ofld_connection_wr *req; + unsigned int mtu_idx; + int wscale; + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, + ep->l2t)); + req->le.lport = ep->com.local_addr.sin_port; + req->le.pport = ep->com.remote_addr.sin_port; + req->le.u.ipv4.lip = ep->com.local_addr.sin_addr.s_addr; + req->le.u.ipv4.pip = ep->com.remote_addr.sin_addr.s_addr; + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) | + V_FW_OFLD_CONNECTION_WR_ASTID(atid)); + req->tcb.cplrxdataack_cplpassacceptrpl = + htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); + req->tcb.tx_max = jiffies; + req->tcb.rcv_adv = htons(1); + cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + wscale = compute_wscale(rcv_win); + req->tcb.opt0 = TCAM_BYPASS(1) | + (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | + DELACK(1) | + WND_SCALE(wscale) | + MSS_IDX(mtu_idx) | + L2T_IDX(ep->l2t->idx) | + TX_CHAN(ep->tx_chan) | + SMAC_SEL(ep->smac_idx) | + DSCP(ep->tos) | + ULP_MODE(ULP_MODE_TCPDDP) | + RCV_BUFSIZ(rcv_win >> 10); + req->tcb.opt2 = PACE(1) | + TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | + RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + if (enable_tcp_timestamps) + req->tcb.opt2 |= TSTAMPS_EN(1); + if (enable_tcp_sack) + req->tcb.opt2 |= SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + req->tcb.opt2 |= WND_SCALE_EN(1); + req->tcb.opt0 = cpu_to_be64(req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32(req->tcb.opt2); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); + set_bit(ACT_OFLD_CONN, &ep->com.history); + c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + /* * Return whether a failed active open has allocated a TID */ @@ -1392,6 +1518,111 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_ARP_MISS; } +#define ACT_OPEN_RETRY_COUNT 2 + +static int c4iw_reconnect(struct c4iw_ep *ep) +{ + int err = 0; + struct rtable *rt; + struct port_info *pi; + struct net_device *pdev; + int step; + struct neighbour *neigh; + + PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); + init_timer(&ep->timer); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + pr_err("%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); + + /* find a route */ + rt = find_route(ep->com.dev, + ep->com.cm_id->local_addr.sin_addr.s_addr, + ep->com.cm_id->remote_addr.sin_addr.s_addr, + ep->com.cm_id->local_addr.sin_port, + ep->com.cm_id->remote_addr.sin_port, 0); + if (!rt) { + pr_err("%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + ep->dst = &rt->dst; + + neigh = dst_neigh_lookup(ep->dst, + &ep->com.cm_id->remote_addr.sin_addr.s_addr); + /* get a l2t entry */ + if (neigh->dev->flags & IFF_LOOPBACK) { + PDBG("%s LOOPBACK\n", __func__); + pdev = ip_dev_find(&init_net, + ep->com.cm_id->remote_addr.sin_addr.s_addr); + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + dev_put(pdev); + } else { + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, neigh->dev, 0); + pi = (struct port_info *)netdev_priv(neigh->dev); + ep->mtu = dst_mtu(ep->dst); + ep->tx_chan = cxgb4_port_chan(neigh->dev); + ep->smac_idx = (cxgb4_port_viid(neigh->dev) & + 0x7F) << 1; + } + + step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; + ep->txq_idx = pi->port_id * step; + ep->ctrlq_idx = pi->port_id; + step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan; + ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[pi->port_id * step]; + + if (!ep->l2t) { + pr_err("%s - cannot alloc l2e.\n", __func__); + err = -ENOMEM; + goto fail4; + } + + PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = 0; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + /* + * remember to send notification to upper layer. + * We are in here so the upper layer is not aware that this is + * re-connect attempt and so, upper layer is still waiting for + * response of 1st connect request. + */ + connect_reply_upcall(ep, -ECONNRESET); + c4iw_put_ep(&ep->com); +out: + return err; +} + static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; @@ -1412,6 +1643,8 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } + set_bit(ACT_OPEN_RPL, &ep->com.history); + /* * Log interesting failures. */ @@ -1419,6 +1652,29 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) case CPL_ERR_CONN_RESET: case CPL_ERR_CONN_TIMEDOUT: break; + case CPL_ERR_TCAM_FULL: + if (dev->rdev.lldi.enable_fw_ofld_conn) { + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.tcam_full++; + mutex_unlock(&dev->rdev.stats.lock); + send_fw_act_open_req(ep, + GET_TID_TID(GET_AOPEN_ATID( + ntohl(rpl->atid_status)))); + return 0; + } + break; + case CPL_ERR_CONN_EXIST: + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + set_bit(ACT_RETRY_INUSE, &ep->com.history); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, + atid); + cxgb4_free_atid(t, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + return 0; + } + break; default: printk(KERN_INFO MOD "Active open failure - " "atid %u status %u errno %d %pI4:%u->%pI4:%u\n", @@ -1436,6 +1692,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (status && act_open_has_tid(status)) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl)); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -1452,13 +1709,14 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_listen_ep *ep = lookup_stid(t, stid); if (!ep) { - printk(KERN_ERR MOD "stid %d lookup failure!\n", stid); - return 0; + PDBG("%s stid %d lookup failure!\n", __func__, stid); + goto out; } PDBG("%s ep %p status %d error %d\n", __func__, ep, rpl->status, status2errno(rpl->status)); c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); +out: return 0; } @@ -1510,14 +1768,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, skb_get(skb); cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - opt0 = KEEP_ALIVE(1) | + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | L2T_IDX(ep->l2t->idx) | TX_CHAN(ep->tx_chan) | SMAC_SEL(ep->smac_idx) | - DSCP(ep->tos) | + DSCP(ep->tos >> 2) | ULP_MODE(ULP_MODE_TCPDDP) | RCV_BUFSIZ(rcv_win>>10); opt2 = RX_CHANNEL(0) | @@ -1529,6 +1788,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN(1); + if (enable_ecn) { + const struct tcphdr *tcph; + u32 hlen = ntohl(req->hdr_len); + + tcph = (const void *)(req + 1) + G_ETH_HDR_LEN(hlen) + + G_IP_HDR_LEN(hlen); + if (tcph->ece && tcph->cwr) + opt2 |= CCTRL_ECN(1); + } rpl = cplhdr(skb); INIT_TP_WR(rpl, ep->hwtid); @@ -1645,22 +1913,30 @@ out: static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { - struct c4iw_ep *child_ep, *parent_ep; + struct c4iw_ep *child_ep = NULL, *parent_ep; struct cpl_pass_accept_req *req = cplhdr(skb); unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); struct tid_info *t = dev->rdev.lldi.tids; unsigned int hwtid = GET_TID(req); struct dst_entry *dst; struct rtable *rt; - __be32 local_ip, peer_ip; + __be32 local_ip, peer_ip = 0; __be16 local_port, peer_port; int err; + u16 peer_mss = ntohs(req->tcpopt.mss); parent_ep = lookup_stid(t, stid); - PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); - + if (!parent_ep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port); + PDBG("%s parent ep %p hwtid %u laddr 0x%x raddr 0x%x lport %d " \ + "rport %d peer_mss %d\n", __func__, parent_ep, hwtid, + ntohl(local_ip), ntohl(peer_ip), ntohs(local_port), + ntohs(peer_port), peer_mss); + if (state_read(&parent_ep->com) != LISTEN) { printk(KERN_ERR "%s - listening ep not in LISTEN\n", __func__); @@ -1694,6 +1970,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } + if (peer_mss && child_ep->mtu > (peer_mss + 40)) + child_ep->mtu = peer_mss + 40; + state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; @@ -1715,6 +1994,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); accept_cr(child_ep, peer_ip, skb, req); + set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); goto out; reject: reject_cr(dev, hwtid, peer_ip, skb); @@ -1734,12 +2014,17 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid, + ntohs(req->tcp_opt)); + set_emss(ep, ntohs(req->tcp_opt)); + insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); dst_confirm(ep->dst); state_set(&ep->com, MPA_REQ_WAIT); start_ep_timer(ep); send_flowc(ep, skb); + set_bit(PASS_ESTAB, &ep->com.history); return 0; } @@ -1759,6 +2044,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); dst_confirm(ep->dst); + set_bit(PEER_CLOSE, &ep->com.history); mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: @@ -1838,74 +2124,6 @@ static int is_neg_adv_abort(unsigned int status) status == CPL_ERR_PERSIST_NEG_ADVICE; } -static int c4iw_reconnect(struct c4iw_ep *ep) -{ - struct rtable *rt; - int err = 0; - - PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); - init_timer(&ep->timer); - - /* - * Allocate an active TID to initiate a TCP connection. - */ - ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); - if (ep->atid == -1) { - printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); - err = -ENOMEM; - goto fail2; - } - - /* find a route */ - rt = find_route(ep->com.dev, - ep->com.cm_id->local_addr.sin_addr.s_addr, - ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->com.cm_id->local_addr.sin_port, - ep->com.cm_id->remote_addr.sin_port, 0); - if (!rt) { - printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); - err = -EHOSTUNREACH; - goto fail3; - } - ep->dst = &rt->dst; - - err = import_ep(ep, ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->dst, ep->com.dev, false); - if (err) { - printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - goto fail4; - } - - PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", - __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, - ep->l2t->idx); - - state_set(&ep->com, CONNECTING); - ep->tos = 0; - - /* send connect request to rnic */ - err = send_connect(ep); - if (!err) - goto out; - - cxgb4_l2t_release(ep->l2t); -fail4: - dst_release(ep->dst); -fail3: - cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); -fail2: - /* - * remember to send notification to upper layer. - * We are in here so the upper layer is not aware that this is - * re-connect attempt and so, upper layer is still waiting for - * response of 1st connect request. - */ - connect_reply_upcall(ep, -ECONNRESET); - c4iw_put_ep(&ep->com); -out: - return err; -} - static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); @@ -1926,6 +2144,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) } PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); + set_bit(PEER_ABORT, &ep->com.history); /* * Wake up any threads in rdma_init() or rdma_fini(). @@ -2140,6 +2359,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) c4iw_put_ep(&ep->com); return -ECONNRESET; } + set_bit(ULP_REJECT, &ep->com.history); BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); @@ -2169,6 +2389,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); BUG_ON(!qp); + set_bit(ULP_ACCEPT, &ep->com.history); if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { abort_connection(ep, NULL, GFP_KERNEL); @@ -2292,6 +2513,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto fail2; } + insert_handle(dev, &dev->atid_idr, ep, ep->atid); PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__, ntohl(cm_id->local_addr.sin_addr.s_addr), @@ -2337,6 +2559,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) fail4: dst_release(ep->dst); fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); fail2: cm_id->rem_ref(cm_id); @@ -2351,7 +2574,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *ep; - might_sleep(); ep = alloc_ep(sizeof(*ep), GFP_KERNEL); @@ -2370,30 +2592,54 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) /* * Allocate a server TID. */ - ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (dev->rdev.lldi.enable_fw_ofld_conn) + ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, PF_INET, ep); + else + ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (ep->stid == -1) { printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); err = -ENOMEM; goto fail2; } - + insert_handle(dev, &dev->stid_idr, ep, ep->stid); state_set(&ep->com, LISTEN); - c4iw_init_wr_wait(&ep->com.wr_wait); - err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid, - ep->com.local_addr.sin_addr.s_addr, - ep->com.local_addr.sin_port, - ep->com.dev->rdev.lldi.rxq_ids[0]); - if (err) - goto fail3; - - /* wait for pass_open_rpl */ - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, - __func__); + if (dev->rdev.lldi.enable_fw_ofld_conn) { + do { + err = cxgb4_create_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + 0, + ep->com.dev->rdev.lldi.rxq_ids[0], + 0, + 0); + if (err == -EBUSY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(100)); + } + } while (err == -EBUSY); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], + ep->stid, ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + 0, + ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + &ep->com.wr_wait, + 0, 0, __func__); + } if (!err) { cm_id->provider_data = ep; goto out; } -fail3: + pr_err("%s cxgb4_create_server/filter failed err %d " \ + "stid %d laddr %08x lport %d\n", \ + __func__, err, ep->stid, + ntohl(ep->com.local_addr.sin_addr.s_addr), + ntohs(ep->com.local_addr.sin_port)); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); fail2: cm_id->rem_ref(cm_id); @@ -2412,12 +2658,19 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) might_sleep(); state_set(&ep->com, DEAD); - c4iw_init_wr_wait(&ep->com.wr_wait); - err = listen_stop(ep); - if (err) - goto done; - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, - __func__); + if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn) { + err = cxgb4_remove_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.dev->rdev.lldi.rxq_ids[0], 0); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = listen_stop(ep); + if (err) + goto done; + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, + 0, 0, __func__); + } + remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); done: cm_id->rem_ref(cm_id); @@ -2481,10 +2734,13 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { + set_bit(EP_DISC_ABORT, &ep->com.history); close_complete_upcall(ep); ret = send_abort(ep, NULL, gfp); - } else + } else { + set_bit(EP_DISC_CLOSE, &ep->com.history); ret = send_halfclose(ep, gfp); + } if (ret) fatal = 1; } @@ -2494,10 +2750,323 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) return ret; } -static int async_event(struct c4iw_dev *dev, struct sk_buff *skb) +static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct c4iw_ep *ep; + int atid = be32_to_cpu(req->tid); + + ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, req->tid); + if (!ep) + return; + + switch (req->retval) { + case FW_ENOMEM: + set_bit(ACT_RETRY_NOMEM, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + case FW_EADDRINUSE: + set_bit(ACT_RETRY_INUSE, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + break; + default: + pr_info("%s unexpected ofld conn wr retval %d\n", + __func__, req->retval); + break; + } + pr_err("active ofld_connect_wr failure %d atid %d\n", + req->retval, atid); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.act_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + connect_reply_upcall(ep, status2errno(req->retval)); + state_set(&ep->com, DEAD); + remove_handle(dev, &dev->atid_idr, atid); + cxgb4_free_atid(dev->rdev.lldi.tids, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); +} + +static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct sk_buff *rpl_skb; + struct cpl_pass_accept_req *cpl; + int ret; + + rpl_skb = (struct sk_buff *)cpu_to_be64(req->cookie); + BUG_ON(!rpl_skb); + if (req->retval) { + PDBG("%s passive open failure %d\n", __func__, req->retval); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pas_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + kfree_skb(rpl_skb); + } else { + cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); + OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, + htonl(req->tid))); + ret = pass_accept_req(dev, rpl_skb); + if (!ret) + kfree_skb(rpl_skb); + } + return; +} + +static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_fw6_msg *rpl = cplhdr(skb); - c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + struct cpl_fw6_msg_ofld_connection_wr_rpl *req; + + switch (rpl->type) { + case FW6_TYPE_CQE: + c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + break; + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: + req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data; + switch (req->t_state) { + case TCP_SYN_SENT: + active_ofld_conn_reply(dev, skb, req); + break; + case TCP_SYN_RECV: + passive_ofld_conn_reply(dev, skb, req); + break; + default: + pr_err("%s unexpected ofld conn wr state %d\n", + __func__, req->t_state); + break; + } + break; + } + return 0; +} + +static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) +{ + u32 l2info; + u16 vlantag, len, hdr_len; + u8 intf; + struct cpl_rx_pkt *cpl = cplhdr(skb); + struct cpl_pass_accept_req *req; + struct tcp_options_received tmp_opt; + + /* Store values from cpl_rx_pkt in temporary location. */ + vlantag = cpl->vlan; + len = cpl->len; + l2info = cpl->l2info; + hdr_len = cpl->hdr_len; + intf = cpl->iff; + + __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); + + /* + * We need to parse the TCP options from SYN packet. + * to generate cpl_pass_accept_req. + */ + memset(&tmp_opt, 0, sizeof(tmp_opt)); + tcp_clear_options(&tmp_opt); + tcp_parse_options(skb, &tmp_opt, 0, 0, NULL); + + req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->l2info = cpu_to_be16(V_SYN_INTF(intf) | + V_SYN_MAC_IDX(G_RX_MACIDX(htonl(l2info))) | + F_SYN_XACT_MATCH); + req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN(htonl(l2info))) | + V_TCP_HDR_LEN(G_RX_TCPHDR_LEN(htons(hdr_len))) | + V_IP_HDR_LEN(G_RX_IPHDR_LEN(htons(hdr_len))) | + V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(htonl(l2info)))); + req->vlan = vlantag; + req->len = len; + req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | + PASS_OPEN_TOS(tos)); + req->tcpopt.mss = htons(tmp_opt.mss_clamp); + if (tmp_opt.wscale_ok) + req->tcpopt.wsf = tmp_opt.snd_wscale; + req->tcpopt.tstamp = tmp_opt.saw_tstamp; + if (tmp_opt.sack_ok) + req->tcpopt.sack = 1; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0)); + return; +} + +static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, + __be32 laddr, __be16 lport, + __be32 raddr, __be16 rport, + u32 rcv_isn, u32 filter, u16 window, + u32 rss_qid, u8 port_id) +{ + struct sk_buff *req_skb; + struct fw_ofld_connection_wr *req; + struct cpl_pass_accept_req *cpl = cplhdr(skb); + + req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL); + req->le.filter = filter; + req->le.lport = lport; + req->le.pport = rport; + req->le.u.ipv4.lip = laddr; + req->le.u.ipv4.pip = raddr; + req->tcb.rcv_nxt = htonl(rcv_isn + 1); + req->tcb.rcv_adv = htons(window); + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_RECV) | + V_FW_OFLD_CONNECTION_WR_RCV_SCALE(cpl->tcpopt.wsf) | + V_FW_OFLD_CONNECTION_WR_ASTID( + GET_PASS_OPEN_TID(ntohl(cpl->tos_stid)))); + + /* + * We store the qid in opt2 which will be used by the firmware + * to send us the wr response. + */ + req->tcb.opt2 = htonl(V_RSS_QUEUE(rss_qid)); + + /* + * We initialize the MSS index in TCB to 0xF. + * So that when driver sends cpl_pass_accept_rpl + * TCB picks up the correct value. If this was 0 + * TP will ignore any value > 0 for MSS index. + */ + req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); + req->cookie = cpu_to_be64((u64)skb); + + set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); + cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); +} + +/* + * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt + * messages when a filter is being used instead of server to + * redirect a syn packet. When packets hit filter they are redirected + * to the offload queue and driver tries to establish the connection + * using firmware work request. + */ +static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) +{ + int stid; + unsigned int filter; + struct ethhdr *eh = NULL; + struct vlan_ethhdr *vlan_eh = NULL; + struct iphdr *iph; + struct tcphdr *tcph; + struct rss_header *rss = (void *)skb->data; + struct cpl_rx_pkt *cpl = (void *)skb->data; + struct cpl_pass_accept_req *req = (void *)(rss + 1); + struct l2t_entry *e; + struct dst_entry *dst; + struct rtable *rt; + struct c4iw_ep *lep; + u16 window; + struct port_info *pi; + struct net_device *pdev; + u16 rss_qid; + int step; + u32 tx_chan; + struct neighbour *neigh; + + /* Drop all non-SYN packets */ + if (!(cpl->l2info & cpu_to_be32(F_RXF_SYN))) + goto reject; + + /* + * Drop all packets which did not hit the filter. + * Unlikely to happen. + */ + if (!(rss->filter_hit && rss->filter_tid)) + goto reject; + + /* + * Calculate the server tid from filter hit index from cpl_rx_pkt. + */ + stid = cpu_to_be32(rss->hash_val) - dev->rdev.lldi.tids->sftid_base + + dev->rdev.lldi.tids->nstids; + + lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); + if (!lep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } + + if (G_RX_ETHHDR_LEN(ntohl(cpl->l2info)) == ETH_HLEN) { + eh = (struct ethhdr *)(req + 1); + iph = (struct iphdr *)(eh + 1); + } else { + vlan_eh = (struct vlan_ethhdr *)(req + 1); + iph = (struct iphdr *)(vlan_eh + 1); + skb->vlan_tci = ntohs(cpl->vlan); + } + + if (iph->version != 0x4) + goto reject; + + tcph = (struct tcphdr *)(iph + 1); + skb_set_network_header(skb, (void *)iph - (void *)rss); + skb_set_transport_header(skb, (void *)tcph - (void *)rss); + skb_get(skb); + + PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__, + ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), + ntohs(tcph->source), iph->tos); + + rt = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, + iph->tos); + if (!rt) { + pr_err("%s - failed to find dst entry!\n", + __func__); + goto reject; + } + dst = &rt->dst; + neigh = dst_neigh_lookup_skb(dst, skb); + + if (neigh->dev->flags & IFF_LOOPBACK) { + pdev = ip_dev_find(&init_net, iph->daddr); + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + tx_chan = cxgb4_port_chan(pdev); + dev_put(pdev); + } else { + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + neigh->dev, 0); + pi = (struct port_info *)netdev_priv(neigh->dev); + tx_chan = cxgb4_port_chan(neigh->dev); + } + if (!e) { + pr_err("%s - failed to allocate l2t entry!\n", + __func__); + goto free_dst; + } + + step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; + rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; + window = htons(tcph->window); + + /* Calcuate filter portion for LE region. */ + filter = cpu_to_be32(select_ntuple(dev, dst, e)); + + /* + * Synthesize the cpl_pass_accept_req. We have everything except the + * TID. Once firmware sends a reply with TID we update the TID field + * in cpl and pass it through the regular cpl_pass_accept_req path. + */ + build_cpl_pass_accept_req(skb, stid, iph->tos); + send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr, + tcph->source, ntohl(tcph->seq), filter, window, + rss_qid, pi->port_id); + cxgb4_l2t_release(e); +free_dst: + dst_release(dst); +reject: return 0; } @@ -2520,7 +3089,8 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_CLOSE_CON_RPL] = close_con_rpl, [CPL_RDMA_TERMINATE] = terminate, [CPL_FW4_ACK] = fw4_ack, - [CPL_FW6_MSG] = async_event + [CPL_FW6_MSG] = deferred_fw6_msg, + [CPL_RX_PKT] = rx_pkt }; static void process_timeout(struct c4iw_ep *ep) @@ -2531,6 +3101,7 @@ static void process_timeout(struct c4iw_ep *ep) mutex_lock(&ep->com.mutex); PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); + set_bit(TIMEDOUT, &ep->com.history); switch (ep->com.state) { case MPA_REQ_SENT: __state_set(&ep->com, ABORTING); @@ -2651,7 +3222,7 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s type %u\n", __func__, rpl->type); switch (rpl->type) { - case 1: + case FW6_TYPE_WR_RPL: ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); @@ -2659,7 +3230,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_wake_up(wr_waitp, ret ? -ret : 0); kfree_skb(skb); break; - case 2: + case FW6_TYPE_CQE: + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: sched(dev, skb); break; default: @@ -2722,7 +3294,8 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { [CPL_RDMA_TERMINATE] = sched, [CPL_FW4_ACK] = sched, [CPL_SET_TCB_RPL] = set_tcb_rpl, - [CPL_FW6_MSG] = fw6_msg + [CPL_FW6_MSG] = fw6_msg, + [CPL_RX_PKT] = sched }; int __init c4iw_cm_init(void) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index cb4ecd783700..ba11c76c0b5a 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -279,6 +279,11 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " DB State: %s Transitions %llu\n", db_state_str[dev->db_state], dev->rdev.stats.db_state_transitions); + seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full); + seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.act_ofld_conn_fails); + seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.pas_ofld_conn_fails); return 0; } @@ -309,6 +314,9 @@ static ssize_t stats_clear(struct file *file, const char __user *buf, dev->rdev.stats.db_empty = 0; dev->rdev.stats.db_drop = 0; dev->rdev.stats.db_state_transitions = 0; + dev->rdev.stats.tcam_full = 0; + dev->rdev.stats.act_ofld_conn_fails = 0; + dev->rdev.stats.pas_ofld_conn_fails = 0; mutex_unlock(&dev->rdev.stats.lock); return count; } @@ -322,6 +330,113 @@ static const struct file_operations stats_debugfs_fops = { .write = stats_clear, }; +static int dump_ep(int id, void *p, void *data) +{ + struct c4iw_ep *ep = p; + struct c4iw_debugfs_data *epd = data; + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p qp %p state %d flags 0x%lx history 0x%lx " + "hwtid %d atid %d %pI4:%d <-> %pI4:%d\n", + ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, + ep->com.flags, ep->com.history, ep->hwtid, ep->atid, + &ep->com.local_addr.sin_addr.s_addr, + ntohs(ep->com.local_addr.sin_port), + &ep->com.remote_addr.sin_addr.s_addr, + ntohs(ep->com.remote_addr.sin_port)); + if (cc < space) + epd->pos += cc; + return 0; +} + +static int dump_listen_ep(int id, void *p, void *data) +{ + struct c4iw_listen_ep *ep = p; + struct c4iw_debugfs_data *epd = data; + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p state %d flags 0x%lx stid %d backlog %d " + "%pI4:%d\n", ep, ep->com.cm_id, (int)ep->com.state, + ep->com.flags, ep->stid, ep->backlog, + &ep->com.local_addr.sin_addr.s_addr, + ntohs(ep->com.local_addr.sin_port)); + if (cc < space) + epd->pos += cc; + return 0; +} + +static int ep_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd = file->private_data; + if (!epd) { + pr_info("%s null qpd?\n", __func__); + return 0; + } + vfree(epd->buf); + kfree(epd); + return 0; +} + +static int ep_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd; + int ret = 0; + int count = 1; + + epd = kmalloc(sizeof(*epd), GFP_KERNEL); + if (!epd) { + ret = -ENOMEM; + goto out; + } + epd->devp = inode->i_private; + epd->pos = 0; + + spin_lock_irq(&epd->devp->lock); + idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count); + idr_for_each(&epd->devp->atid_idr, count_idrs, &count); + idr_for_each(&epd->devp->stid_idr, count_idrs, &count); + spin_unlock_irq(&epd->devp->lock); + + epd->bufsize = count * 160; + epd->buf = vmalloc(epd->bufsize); + if (!epd->buf) { + ret = -ENOMEM; + goto err1; + } + + spin_lock_irq(&epd->devp->lock); + idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd); + idr_for_each(&epd->devp->atid_idr, dump_ep, epd); + idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd); + spin_unlock_irq(&epd->devp->lock); + + file->private_data = epd; + goto out; +err1: + kfree(epd); +out: + return ret; +} + +static const struct file_operations ep_debugfs_fops = { + .owner = THIS_MODULE, + .open = ep_open, + .release = ep_release, + .read = debugfs_read, +}; + static int setup_debugfs(struct c4iw_dev *devp) { struct dentry *de; @@ -344,6 +459,11 @@ static int setup_debugfs(struct c4iw_dev *devp) if (de && de->d_inode) de->d_inode->i_size = 4096; + de = debugfs_create_file("eps", S_IWUSR, devp->debugfs_root, + (void *)devp, &ep_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + return 0; } @@ -475,6 +595,9 @@ static void c4iw_dealloc(struct uld_ctx *ctx) idr_destroy(&ctx->dev->cqidr); idr_destroy(&ctx->dev->qpidr); idr_destroy(&ctx->dev->mmidr); + idr_destroy(&ctx->dev->hwtid_idr); + idr_destroy(&ctx->dev->stid_idr); + idr_destroy(&ctx->dev->atid_idr); iounmap(ctx->dev->rdev.oc_mw_kva); ib_dealloc_device(&ctx->dev->ibdev); ctx->dev = NULL; @@ -532,6 +655,9 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) idr_init(&devp->cqidr); idr_init(&devp->qpidr); idr_init(&devp->mmidr); + idr_init(&devp->hwtid_idr); + idr_init(&devp->stid_idr); + idr_init(&devp->atid_idr); spin_lock_init(&devp->lock); mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); @@ -577,14 +703,76 @@ out: return ctx; } +static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, + const __be64 *rsp, + u32 pktshift) +{ + struct sk_buff *skb; + + /* + * Allocate space for cpl_pass_accept_req which will be synthesized by + * driver. Once the driver synthesizes the request the skb will go + * through the regular cpl_pass_accept_req processing. + * The math here assumes sizeof cpl_pass_accept_req >= sizeof + * cpl_rx_pkt. + */ + skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift); + + /* + * This skb will contain: + * rss_header from the rspq descriptor (1 flit) + * cpl_rx_pkt struct from the rspq descriptor (2 flits) + * space for the difference between the size of an + * rx_pkt and pass_accept_req cpl (1 flit) + * the packet data from the gl + */ + skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header)); + skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) + + sizeof(struct cpl_pass_accept_req), + gl->va + pktshift, + gl->tot_len - pktshift); + return skb; +} + +static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl, + const __be64 *rsp) +{ + unsigned int opcode = *(u8 *)rsp; + struct sk_buff *skb; + + if (opcode != CPL_RX_PKT) + goto out; + + skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift); + if (skb == NULL) + goto out; + + if (c4iw_handlers[opcode] == NULL) { + pr_info("%s no handler opcode 0x%x...\n", __func__, + opcode); + kfree_skb(skb); + goto out; + } + c4iw_handlers[opcode](dev, skb); + return 1; +out: + return 0; +} + static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, const struct pkt_gl *gl) { struct uld_ctx *ctx = handle; struct c4iw_dev *dev = ctx->dev; struct sk_buff *skb; - const struct cpl_act_establish *rpl; - unsigned int opcode; + u8 opcode; if (gl == NULL) { /* omit RSS and rsp_ctrl at end of descriptor */ @@ -601,19 +789,29 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, u32 qid = be32_to_cpu(rc->pldbuflen_qid); c4iw_ev_handler(dev, qid); return 0; + } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) { + if (recv_rx_pkt(dev, gl, rsp)) + return 0; + + pr_info("%s: unexpected FL contents at %p, " \ + "RSS %#llx, FL %#llx, len %u\n", + pci_name(ctx->lldi.pdev), gl->va, + (unsigned long long)be64_to_cpu(*rsp), + (unsigned long long)be64_to_cpu(*(u64 *)gl->va), + gl->tot_len); + + return 0; } else { skb = cxgb4_pktgl_to_skb(gl, 128, 128); if (unlikely(!skb)) goto nomem; } - rpl = cplhdr(skb); - opcode = rpl->ot.opcode; - + opcode = *(u8 *)rsp; if (c4iw_handlers[opcode]) c4iw_handlers[opcode](dev, skb); else - printk(KERN_INFO "%s no handler opcode 0x%x...\n", __func__, + pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); return 0; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 9beb3a9f0336..9c1644fb0259 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -130,6 +130,9 @@ struct c4iw_stats { u64 db_empty; u64 db_drop; u64 db_state_transitions; + u64 tcam_full; + u64 act_ofld_conn_fails; + u64 pas_ofld_conn_fails; }; struct c4iw_rdev { @@ -223,6 +226,9 @@ struct c4iw_dev { struct dentry *debugfs_root; enum db_state db_state; int qpcnt; + struct idr hwtid_idr; + struct idr atid_idr; + struct idr stid_idr; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -712,6 +718,31 @@ enum c4iw_ep_flags { CLOSE_SENT = 3, }; +enum c4iw_ep_history { + ACT_OPEN_REQ = 0, + ACT_OFLD_CONN = 1, + ACT_OPEN_RPL = 2, + ACT_ESTAB = 3, + PASS_ACCEPT_REQ = 4, + PASS_ESTAB = 5, + ABORT_UPCALL = 6, + ESTAB_UPCALL = 7, + CLOSE_UPCALL = 8, + ULP_ACCEPT = 9, + ULP_REJECT = 10, + TIMEDOUT = 11, + PEER_ABORT = 12, + PEER_CLOSE = 13, + CONNREQ_UPCALL = 14, + ABORT_CONN = 15, + DISCONN_UPCALL = 16, + EP_DISC_CLOSE = 17, + EP_DISC_ABORT = 18, + CONN_RPL_UPCALL = 19, + ACT_RETRY_NOMEM = 20, + ACT_RETRY_INUSE = 21 +}; + struct c4iw_ep_common { struct iw_cm_id *cm_id; struct c4iw_qp *qp; @@ -723,6 +754,7 @@ struct c4iw_ep_common { struct sockaddr_in remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; + unsigned long history; }; struct c4iw_listen_ep { @@ -760,6 +792,7 @@ struct c4iw_ep { u8 tos; u8 retry_with_mpa_v1; u8 tried_with_mpa_v1; + unsigned int retry_count; }; static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 72ae63f0072d..03103d2bd641 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -752,6 +752,9 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ dev->trans_start = jiffies; ++tx->tx_head; + skb_orphan(skb); + skb_dst_drop(skb); + if (++priv->tx_outstanding == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f10221f40803..a1bca70e20aa 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -615,8 +615,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, address->last_send = priv->tx_head; ++priv->tx_head; - skb_orphan(skb); + skb_orphan(skb); + skb_dst_drop(skb); } if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 55074cba20eb..c1c74e030a58 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -57,17 +57,9 @@ * physically contiguous memory regions it is mapping into page sizes * that we support. * - * Traditionally the IOMMU core just handed us the mappings directly, - * after making sure the size is an order of a 4KiB page and that the - * mapping has natural alignment. - * - * To retain this behavior, we currently advertise that we support - * all page sizes that are an order of 4KiB. - * - * If at some point we'd like to utilize the IOMMU core's new behavior, - * we could change this to advertise the real page sizes we support. + * 512GB Pages are not supported due to a hardware bug */ -#define AMD_IOMMU_PGSIZES (~0xFFFUL) +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) static DEFINE_RWLOCK(amd_iommu_devtable_lock); @@ -140,6 +132,9 @@ static void free_dev_data(struct iommu_dev_data *dev_data) list_del(&dev_data->dev_data_list); spin_unlock_irqrestore(&dev_data_list_lock, flags); + if (dev_data->group) + iommu_group_put(dev_data->group); + kfree(dev_data); } @@ -274,41 +269,23 @@ static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) *from = to; } -#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - -static int iommu_init_device(struct device *dev) +static struct pci_bus *find_hosted_bus(struct pci_bus *bus) { - struct pci_dev *dma_pdev = NULL, *pdev = to_pci_dev(dev); - struct iommu_dev_data *dev_data; - struct iommu_group *group; - u16 alias; - int ret; - - if (dev->archdata.iommu) - return 0; - - dev_data = find_dev_data(get_device_id(dev)); - if (!dev_data) - return -ENOMEM; - - alias = amd_iommu_alias_table[dev_data->devid]; - if (alias != dev_data->devid) { - struct iommu_dev_data *alias_data; + while (!bus->self) { + if (!pci_is_root_bus(bus)) + bus = bus->parent; + else + return ERR_PTR(-ENODEV); + } - alias_data = find_dev_data(alias); - if (alias_data == NULL) { - pr_err("AMD-Vi: Warning: Unhandled device %s\n", - dev_name(dev)); - free_dev_data(dev_data); - return -ENOTSUPP; - } - dev_data->alias_data = alias_data; + return bus; +} - dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); - } +#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - if (dma_pdev == NULL) - dma_pdev = pci_dev_get(pdev); +static struct pci_dev *get_isolation_root(struct pci_dev *pdev) +{ + struct pci_dev *dma_pdev = pdev; /* Account for quirked devices */ swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); @@ -330,14 +307,9 @@ static int iommu_init_device(struct device *dev) * Finding the next device may require skipping virtual buses. */ while (!pci_is_root_bus(dma_pdev->bus)) { - struct pci_bus *bus = dma_pdev->bus; - - while (!bus->self) { - if (!pci_is_root_bus(bus)) - bus = bus->parent; - else - goto root_bus; - } + struct pci_bus *bus = find_hosted_bus(dma_pdev->bus); + if (IS_ERR(bus)) + break; if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) break; @@ -345,19 +317,137 @@ static int iommu_init_device(struct device *dev) swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); } -root_bus: - group = iommu_group_get(&dma_pdev->dev); - pci_dev_put(dma_pdev); + return dma_pdev; +} + +static int use_pdev_iommu_group(struct pci_dev *pdev, struct device *dev) +{ + struct iommu_group *group = iommu_group_get(&pdev->dev); + int ret; + if (!group) { group = iommu_group_alloc(); if (IS_ERR(group)) return PTR_ERR(group); + + WARN_ON(&pdev->dev != dev); } ret = iommu_group_add_device(group, dev); - iommu_group_put(group); + return ret; +} + +static int use_dev_data_iommu_group(struct iommu_dev_data *dev_data, + struct device *dev) +{ + if (!dev_data->group) { + struct iommu_group *group = iommu_group_alloc(); + if (IS_ERR(group)) + return PTR_ERR(group); + + dev_data->group = group; + } + + return iommu_group_add_device(dev_data->group, dev); +} + +static int init_iommu_group(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct iommu_group *group; + struct pci_dev *dma_pdev; + int ret; + + group = iommu_group_get(dev); + if (group) { + iommu_group_put(group); + return 0; + } + + dev_data = find_dev_data(get_device_id(dev)); + if (!dev_data) + return -ENOMEM; + + if (dev_data->alias_data) { + u16 alias; + struct pci_bus *bus; + + if (dev_data->alias_data->group) + goto use_group; + + /* + * If the alias device exists, it's effectively just a first + * level quirk for finding the DMA source. + */ + alias = amd_iommu_alias_table[dev_data->devid]; + dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); + if (dma_pdev) { + dma_pdev = get_isolation_root(dma_pdev); + goto use_pdev; + } + + /* + * If the alias is virtual, try to find a parent device + * and test whether the IOMMU group is actualy rooted above + * the alias. Be careful to also test the parent device if + * we think the alias is the root of the group. + */ + bus = pci_find_bus(0, alias >> 8); + if (!bus) + goto use_group; + + bus = find_hosted_bus(bus); + if (IS_ERR(bus) || !bus->self) + goto use_group; + + dma_pdev = get_isolation_root(pci_dev_get(bus->self)); + if (dma_pdev != bus->self || (dma_pdev->multifunction && + !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))) + goto use_pdev; + + pci_dev_put(dma_pdev); + goto use_group; + } + + dma_pdev = get_isolation_root(pci_dev_get(to_pci_dev(dev))); +use_pdev: + ret = use_pdev_iommu_group(dma_pdev, dev); + pci_dev_put(dma_pdev); + return ret; +use_group: + return use_dev_data_iommu_group(dev_data->alias_data, dev); +} + +static int iommu_init_device(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct iommu_dev_data *dev_data; + u16 alias; + int ret; + + if (dev->archdata.iommu) + return 0; + + dev_data = find_dev_data(get_device_id(dev)); + if (!dev_data) + return -ENOMEM; + + alias = amd_iommu_alias_table[dev_data->devid]; + if (alias != dev_data->devid) { + struct iommu_dev_data *alias_data; + + alias_data = find_dev_data(alias); + if (alias_data == NULL) { + pr_err("AMD-Vi: Warning: Unhandled device %s\n", + dev_name(dev)); + free_dev_data(dev_data); + return -ENOTSUPP; + } + dev_data->alias_data = alias_data; + } + ret = init_iommu_group(dev); if (ret) return ret; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index c9aa3d079ff0..e38ab438bb34 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -426,6 +426,7 @@ struct iommu_dev_data { struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reference count */ + struct iommu_group *group; /* IOMMU group for virtual aliases */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Default for device is pt_domain */ diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0badfa48b32b..c2c07a4a7f21 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1827,10 +1827,17 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, if (!pte) return -ENOMEM; /* It is large page*/ - if (largepage_lvl > 1) + if (largepage_lvl > 1) { pteval |= DMA_PTE_LARGE_PAGE; - else + /* Ensure that old small page tables are removed to make room + for superpage, if they exist. */ + dma_pte_clear_range(domain, iov_pfn, + iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1); + dma_pte_free_pagetable(domain, iov_pfn, + iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1); + } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; + } } /* We don't need lock here, nobody else @@ -2320,8 +2327,39 @@ static int domain_add_dev_info(struct dmar_domain *domain, return 0; } +static bool device_has_rmrr(struct pci_dev *dev) +{ + struct dmar_rmrr_unit *rmrr; + int i; + + for_each_rmrr_units(rmrr) { + for (i = 0; i < rmrr->devices_cnt; i++) { + /* + * Return TRUE if this RMRR contains the device that + * is passed in. + */ + if (rmrr->devices[i] == dev) + return true; + } + } + return false; +} + static int iommu_should_identity_map(struct pci_dev *pdev, int startup) { + + /* + * We want to prevent any device associated with an RMRR from + * getting placed into the SI Domain. This is done because + * problems exist when devices are moved in and out of domains + * and their respective RMRR info is lost. We exempt USB devices + * from this process due to their usage of RMRRs that are known + * to not be needed after BIOS hand-off to OS. + */ + if (device_has_rmrr(pdev) && + (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) + return 0; + if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) return 1; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index badc17c2bcb4..18108c1405e2 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -16,13 +16,13 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/ioport.h> -#include <linux/clk.h> #include <linux/platform_device.h> #include <linux/iommu.h> #include <linux/omap-iommu.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/io.h> +#include <linux/pm_runtime.h> #include <asm/cacheflush.h> @@ -143,31 +143,44 @@ EXPORT_SYMBOL_GPL(omap_iommu_arch_version); static int iommu_enable(struct omap_iommu *obj) { int err; + struct platform_device *pdev = to_platform_device(obj->dev); + struct iommu_platform_data *pdata = pdev->dev.platform_data; - if (!obj) + if (!obj || !pdata) return -EINVAL; if (!arch_iommu) return -ENODEV; - clk_enable(obj->clk); + if (pdata->deassert_reset) { + err = pdata->deassert_reset(pdev, pdata->reset_name); + if (err) { + dev_err(obj->dev, "deassert_reset failed: %d\n", err); + return err; + } + } + + pm_runtime_get_sync(obj->dev); err = arch_iommu->enable(obj); - clk_disable(obj->clk); return err; } static void iommu_disable(struct omap_iommu *obj) { - if (!obj) - return; + struct platform_device *pdev = to_platform_device(obj->dev); + struct iommu_platform_data *pdata = pdev->dev.platform_data; - clk_enable(obj->clk); + if (!obj || !pdata) + return; arch_iommu->disable(obj); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); + + if (pdata->assert_reset) + pdata->assert_reset(pdev, pdata->reset_name); } /* @@ -290,7 +303,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) if (!obj || !obj->nr_tlb_entries || !e) return -EINVAL; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); iotlb_lock_get(obj, &l); if (l.base == obj->nr_tlb_entries) { @@ -320,7 +333,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) cr = iotlb_alloc_cr(obj, e); if (IS_ERR(cr)) { - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return PTR_ERR(cr); } @@ -334,7 +347,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) l.vict = l.base; iotlb_lock_set(obj, &l); out: - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return err; } @@ -364,7 +377,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da) int i; struct cr_regs cr; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) { u32 start; @@ -383,7 +396,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da) iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY); } } - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); if (i == obj->nr_tlb_entries) dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da); @@ -397,7 +410,7 @@ static void flush_iotlb_all(struct omap_iommu *obj) { struct iotlb_lock l; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); l.base = 0; l.vict = 0; @@ -405,7 +418,7 @@ static void flush_iotlb_all(struct omap_iommu *obj) iommu_write_reg(obj, 1, MMU_GFLUSH); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); } #if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE) @@ -415,11 +428,11 @@ ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes) if (!obj || !buf) return -EINVAL; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); bytes = arch_iommu->dump_ctx(obj, buf, bytes); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return bytes; } @@ -433,7 +446,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num) struct cr_regs tmp; struct cr_regs *p = crs; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); iotlb_lock_get(obj, &saved); for_each_iotlb_cr(obj, num, i, tmp) { @@ -443,7 +456,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num) } iotlb_lock_set(obj, &saved); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return p - crs; } @@ -807,9 +820,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) if (!obj->refcount) return IRQ_NONE; - clk_enable(obj->clk); errs = iommu_report_fault(obj, &da); - clk_disable(obj->clk); if (errs == 0) return IRQ_HANDLED; @@ -931,17 +942,10 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev) struct resource *res; struct iommu_platform_data *pdata = pdev->dev.platform_data; - if (pdev->num_resources != 2) - return -EINVAL; - obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL); if (!obj) return -ENOMEM; - obj->clk = clk_get(&pdev->dev, pdata->clk_name); - if (IS_ERR(obj->clk)) - goto err_clk; - obj->nr_tlb_entries = pdata->nr_tlb_entries; obj->name = pdata->name; obj->dev = &pdev->dev; @@ -984,6 +988,9 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev) goto err_irq; platform_set_drvdata(pdev, obj); + pm_runtime_irq_safe(obj->dev); + pm_runtime_enable(obj->dev); + dev_info(&pdev->dev, "%s registered\n", obj->name); return 0; @@ -992,8 +999,6 @@ err_irq: err_ioremap: release_mem_region(res->start, resource_size(res)); err_mem: - clk_put(obj->clk); -err_clk: kfree(obj); return err; } @@ -1014,7 +1019,8 @@ static int __devexit omap_iommu_remove(struct platform_device *pdev) release_mem_region(res->start, resource_size(res)); iounmap(obj->regbase); - clk_put(obj->clk); + pm_runtime_disable(obj->dev); + dev_info(&pdev->dev, "%s removed\n", obj->name); kfree(obj); return 0; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 2b5f3c04d167..120084206602 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -29,7 +29,6 @@ struct iotlb_entry { struct omap_iommu { const char *name; struct module *owner; - struct clk *clk; void __iomem *regbase; struct device *dev; void *isr_priv; @@ -116,8 +115,6 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) * MMU Register offsets */ #define MMU_REVISION 0x00 -#define MMU_SYSCONFIG 0x10 -#define MMU_SYSSTATUS 0x14 #define MMU_IRQSTATUS 0x18 #define MMU_IRQENABLE 0x1c #define MMU_WALKING_ST 0x40 diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c index c02020292377..d745094a69dd 100644 --- a/drivers/iommu/omap-iommu2.c +++ b/drivers/iommu/omap-iommu2.c @@ -28,19 +28,6 @@ */ #define IOMMU_ARCH_VERSION 0x00000011 -/* SYSCONF */ -#define MMU_SYS_IDLE_SHIFT 3 -#define MMU_SYS_IDLE_FORCE (0 << MMU_SYS_IDLE_SHIFT) -#define MMU_SYS_IDLE_NONE (1 << MMU_SYS_IDLE_SHIFT) -#define MMU_SYS_IDLE_SMART (2 << MMU_SYS_IDLE_SHIFT) -#define MMU_SYS_IDLE_MASK (3 << MMU_SYS_IDLE_SHIFT) - -#define MMU_SYS_SOFTRESET (1 << 1) -#define MMU_SYS_AUTOIDLE 1 - -/* SYSSTATUS */ -#define MMU_SYS_RESETDONE 1 - /* IRQSTATUS & IRQENABLE */ #define MMU_IRQ_MULTIHITFAULT (1 << 4) #define MMU_IRQ_TABLEWALKFAULT (1 << 3) @@ -97,7 +84,6 @@ static void __iommu_set_twl(struct omap_iommu *obj, bool on) static int omap2_iommu_enable(struct omap_iommu *obj) { u32 l, pa; - unsigned long timeout; if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd, SZ_16K)) return -EINVAL; @@ -106,29 +92,10 @@ static int omap2_iommu_enable(struct omap_iommu *obj) if (!IS_ALIGNED(pa, SZ_16K)) return -EINVAL; - iommu_write_reg(obj, MMU_SYS_SOFTRESET, MMU_SYSCONFIG); - - timeout = jiffies + msecs_to_jiffies(20); - do { - l = iommu_read_reg(obj, MMU_SYSSTATUS); - if (l & MMU_SYS_RESETDONE) - break; - } while (!time_after(jiffies, timeout)); - - if (!(l & MMU_SYS_RESETDONE)) { - dev_err(obj->dev, "can't take mmu out of reset\n"); - return -ENODEV; - } - l = iommu_read_reg(obj, MMU_REVISION); dev_info(obj->dev, "%s: version %d.%d\n", obj->name, (l >> 4) & 0xf, l & 0xf); - l = iommu_read_reg(obj, MMU_SYSCONFIG); - l &= ~MMU_SYS_IDLE_MASK; - l |= (MMU_SYS_IDLE_SMART | MMU_SYS_AUTOIDLE); - iommu_write_reg(obj, l, MMU_SYSCONFIG); - iommu_write_reg(obj, pa, MMU_TTB); __iommu_set_twl(obj, true); @@ -142,7 +109,6 @@ static void omap2_iommu_disable(struct omap_iommu *obj) l &= ~MMU_CNTL_MASK; iommu_write_reg(obj, l, MMU_CNTL); - iommu_write_reg(obj, MMU_SYS_IDLE_FORCE, MMU_SYSCONFIG); dev_dbg(obj->dev, "%s is shutting down\n", obj->name); } @@ -271,8 +237,6 @@ omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len) char *p = buf; pr_reg(REVISION); - pr_reg(SYSCONFIG); - pr_reg(SYSSTATUS); pr_reg(IRQSTATUS); pr_reg(IRQENABLE); pr_reg(WALKING_ST); diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index c16e8fc8a4bd..4c9db62814ff 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -398,6 +398,7 @@ static int tegra_gart_probe(struct platform_device *pdev) do_gart_setup(gart, NULL); gart_handle = gart; + bus_set_iommu(&platform_bus_type, &gart_iommu_ops); return 0; fail: @@ -450,7 +451,6 @@ static struct platform_driver tegra_gart_driver = { static int __devinit tegra_gart_init(void) { - bus_set_iommu(&platform_bus_type, &gart_iommu_ops); return platform_driver_register(&tegra_gart_driver); } diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 4252d743963d..25c1210c0832 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -694,10 +694,8 @@ static void __smmu_iommu_unmap(struct smmu_as *as, dma_addr_t iova) *pte = _PTE_VACANT(iova); FLUSH_CPU_DCACHE(pte, page, sizeof(*pte)); flush_ptc_and_tlb(as->smmu, as, iova, pte, page, 0); - if (!--(*count)) { + if (!--(*count)) free_ptbl(as, iova); - smmu_flush_regs(as->smmu, 0); - } } static void __smmu_iommu_map_pfn(struct smmu_as *as, dma_addr_t iova, @@ -1232,6 +1230,7 @@ static int tegra_smmu_probe(struct platform_device *pdev) smmu_debugfs_create(smmu); smmu_handle = smmu; + bus_set_iommu(&platform_bus_type, &smmu_iommu_ops); return 0; } @@ -1276,7 +1275,6 @@ static struct platform_driver tegra_smmu_driver = { static int __devinit tegra_smmu_init(void) { - bus_set_iommu(&platform_bus_type, &smmu_iommu_ops); return platform_driver_register(&tegra_smmu_driver); } diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 28c99c623bcd..22b720ec80cb 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -1217,8 +1217,7 @@ static void __exit dsp_cleanup(void) { mISDN_unregister_Bprotocol(&DSP); - if (timer_pending(&dsp_spl_tl)) - del_timer(&dsp_spl_tl); + del_timer_sync(&dsp_spl_tl); if (!list_empty(&dsp_ilist)) { printk(KERN_ERR "mISDN_dsp: Audio DSP object inst list not " diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index b5fdcb78a75b..a5ebc0083d87 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -225,7 +225,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) * eventfd (ie. the appropriate virtqueue thread)? */ if (!send_notify_to_eventfd(cpu)) { - /* OK, we tell the main Laucher. */ + /* OK, we tell the main Launcher. */ if (put_user(cpu->pending_notify, user)) return -EFAULT; return sizeof(cpu->pending_notify); diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index e4e841567459..aefb78e3cbf9 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -208,31 +208,6 @@ void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) EXPORT_SYMBOL_GPL(dm_cell_release); /* - * There are a couple of places where we put a bio into a cell briefly - * before taking it out again. In these situations we know that no other - * bio may be in the cell. This function releases the cell, and also does - * a sanity check. - */ -static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - BUG_ON(cell->holder != bio); - BUG_ON(!bio_list_empty(&cell->bios)); - - __cell_release(cell, NULL); -} - -void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - unsigned long flags; - struct dm_bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release_singleton(cell, bio); - spin_unlock_irqrestore(&prison->lock, flags); -} -EXPORT_SYMBOL_GPL(dm_cell_release_singleton); - -/* * Sometimes we don't want the holder, just the additional bios. */ static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 4e0ac376700a..53d1a7a84e2f 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h @@ -44,7 +44,6 @@ int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, struct bio *inmate, struct dm_bio_prison_cell **ref); void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); -void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); void dm_cell_error(struct dm_bio_prison_cell *cell); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index bbf459bca61d..f7369f9d8595 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1689,8 +1689,7 @@ bad: return ret; } -static int crypt_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int crypt_map(struct dm_target *ti, struct bio *bio) { struct dm_crypt_io *io; struct crypt_config *cc = ti->private; @@ -1846,7 +1845,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 11, 0}, + .version = {1, 12, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index f53846f9ab50..cc1bd048acb2 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -274,8 +274,7 @@ static void delay_resume(struct dm_target *ti) atomic_set(&dc->may_delay, 1); } -static int delay_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int delay_map(struct dm_target *ti, struct bio *bio) { struct delay_c *dc = ti->private; @@ -338,7 +337,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index cc15543a6ad7..9721f2ffb1a2 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -39,6 +39,10 @@ enum feature_flag_bits { DROP_WRITES }; +struct per_bio_data { + bool bio_submitted; +}; + static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, struct dm_target *ti) { @@ -214,6 +218,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->per_bio_data_size = sizeof(struct per_bio_data); ti->private = fc; return 0; @@ -265,11 +270,12 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) } } -static int flakey_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int flakey_map(struct dm_target *ti, struct bio *bio) { struct flakey_c *fc = ti->private; unsigned elapsed; + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + pb->bio_submitted = false; /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; @@ -277,7 +283,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, /* * Flag this bio as submitted while down. */ - map_context->ll = 1; + pb->bio_submitted = true; /* * Map reads as normal. @@ -314,17 +320,16 @@ map_bio: return DM_MAPIO_REMAPPED; } -static int flakey_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) { struct flakey_c *fc = ti->private; - unsigned bio_submitted_while_down = map_context->ll; + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); /* * Corrupt successful READs while in down state. * If flags were specified, only corrupt those that match. */ - if (fc->corrupt_bio_byte && !error && bio_submitted_while_down && + if (fc->corrupt_bio_byte && !error && pb->bio_submitted && (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && all_corrupt_bio_flags_match(bio, fc)) corrupt_bio_data(bio, fc); @@ -406,7 +411,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 1c46f97d6664..ea49834377c8 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -287,7 +287,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, unsigned num_bvecs; sector_t remaining = where->count; struct request_queue *q = bdev_get_queue(where->bdev); - sector_t discard_sectors; + unsigned short logical_block_size = queue_logical_block_size(q); + sector_t num_sectors; /* * where->count may be zero if rw holds a flush and we need to @@ -297,7 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, /* * Allocate a suitably sized-bio. */ - if (rw & REQ_DISCARD) + if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) num_bvecs = 1; else num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), @@ -310,9 +311,21 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, store_io_and_region_in_bio(bio, io, region); if (rw & REQ_DISCARD) { - discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); - bio->bi_size = discard_sectors << SECTOR_SHIFT; - remaining -= discard_sectors; + num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + remaining -= num_sectors; + } else if (rw & REQ_WRITE_SAME) { + /* + * WRITE SAME only uses a single page. + */ + dp->get_page(dp, &page, &len, &offset); + bio_add_page(bio, page, logical_block_size, offset); + num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + + offset = 0; + remaining -= num_sectors; + dp->next_page(dp); } else while (remaining) { /* * Try and add as many pages as possible. diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index afd95986d099..0666b5d14b88 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1543,7 +1543,21 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) return r; } -static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) +#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */ +#define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */ + +static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags) +{ + if (param_flags & DM_WIPE_BUFFER) + memset(param, 0, param_size); + + if (param_flags & DM_PARAMS_VMALLOC) + vfree(param); + else + kfree(param); +} + +static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags) { struct dm_ioctl tmp, *dmi; int secure_data; @@ -1556,7 +1570,21 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) secure_data = tmp.flags & DM_SECURE_DATA_FLAG; - dmi = vmalloc(tmp.data_size); + *param_flags = secure_data ? DM_WIPE_BUFFER : 0; + + /* + * Try to avoid low memory issues when a device is suspended. + * Use kmalloc() rather than vmalloc() when we can. + */ + dmi = NULL; + if (tmp.data_size <= KMALLOC_MAX_SIZE) + dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + + if (!dmi) { + dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + *param_flags |= DM_PARAMS_VMALLOC; + } + if (!dmi) { if (secure_data && clear_user(user, tmp.data_size)) return -EFAULT; @@ -1566,6 +1594,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) if (copy_from_user(dmi, user, tmp.data_size)) goto bad; + /* + * Abort if something changed the ioctl data while it was being copied. + */ + if (dmi->data_size != tmp.data_size) { + DMERR("rejecting ioctl: data size modified while processing parameters"); + goto bad; + } + /* Wipe the user buffer so we do not return it to userspace */ if (secure_data && clear_user(user, tmp.data_size)) goto bad; @@ -1574,9 +1610,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) return 0; bad: - if (secure_data) - memset(dmi, 0, tmp.data_size); - vfree(dmi); + free_params(dmi, tmp.data_size, *param_flags); + return -EFAULT; } @@ -1613,7 +1648,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) static int ctl_ioctl(uint command, struct dm_ioctl __user *user) { int r = 0; - int wipe_buffer; + int param_flags; unsigned int cmd; struct dm_ioctl *uninitialized_var(param); ioctl_fn fn = NULL; @@ -1649,24 +1684,14 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) } /* - * Trying to avoid low memory issues when a device is - * suspended. - */ - current->flags |= PF_MEMALLOC; - - /* * Copy the parameters into kernel space. */ - r = copy_params(user, ¶m); - - current->flags &= ~PF_MEMALLOC; + r = copy_params(user, ¶m, ¶m_flags); if (r) return r; input_param_size = param->data_size; - wipe_buffer = param->flags & DM_SECURE_DATA_FLAG; - r = validate_params(cmd, param); if (r) goto out; @@ -1681,10 +1706,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) r = -EFAULT; out: - if (wipe_buffer) - memset(param, 0, input_param_size); - - vfree(param); + free_params(param, input_param_size, param_flags); return r; } diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index bed444c93d8d..68c02673263b 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -349,7 +349,7 @@ static void complete_io(unsigned long error, void *context) struct dm_kcopyd_client *kc = job->kc; if (error) { - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err |= error; else job->read_err = 1; @@ -361,7 +361,7 @@ static void complete_io(unsigned long error, void *context) } } - if (job->rw == WRITE) + if (job->rw & WRITE) push(&kc->complete_jobs, job); else { @@ -432,7 +432,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, if (r < 0) { /* error this rogue job */ - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err = (unsigned long) -1L; else job->read_err = 1; @@ -585,6 +585,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, unsigned int flags, dm_kcopyd_notify_fn fn, void *context) { struct kcopyd_job *job; + int i; /* * Allocate an array of jobs consisting of one master job @@ -611,7 +612,16 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, memset(&job->source, 0, sizeof job->source); job->source.count = job->dests[0].count; job->pages = &zero_page_list; - job->rw = WRITE; + + /* + * Use WRITE SAME to optimize zeroing if all dests support it. + */ + job->rw = WRITE | REQ_WRITE_SAME; + for (i = 0; i < job->num_dests; i++) + if (!bdev_write_same(job->dests[i].bdev)) { + job->rw = WRITE; + break; + } } job->fn = fn; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 1bf19a93eef0..328cad5617ab 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -55,6 +55,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->num_write_same_requests = 1; ti->private = lc; return 0; @@ -87,8 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) bio->bi_sector = linear_map_sector(ti, bio->bi_sector); } -static int linear_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int linear_map(struct dm_target *ti, struct bio *bio) { linear_map_bio(ti, bio); @@ -155,7 +155,7 @@ static int linear_iterate_devices(struct dm_target *ti, static struct target_type linear_target = { .name = "linear", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 45d94a7e7f6d..3d8984edeff7 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -295,9 +295,11 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) * Choose a reasonable default. All figures in sectors. */ if (min_region_size > (1 << 13)) { + /* If not a power of 2, make it the next power of 2 */ + if (min_region_size & (min_region_size - 1)) + region_size = 1 << fls(region_size); DMINFO("Choosing default region size of %lu sectors", region_size); - region_size = min_region_size; } else { DMINFO("Choosing default region size of 4MiB"); region_size = 1 << 13; /* sectors */ @@ -1216,7 +1218,7 @@ static void raid_dtr(struct dm_target *ti) context_free(rs); } -static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) +static int raid_map(struct dm_target *ti, struct bio *bio) { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; @@ -1430,7 +1432,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 3, 1}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index fd61f98ee1f6..fa519185ebba 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -61,7 +61,6 @@ struct mirror_set { struct dm_region_hash *rh; struct dm_kcopyd_client *kcopyd_client; struct dm_io_client *io_client; - mempool_t *read_record_pool; /* recovery */ region_t nr_regions; @@ -139,14 +138,13 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) queue_bio(ms, bio, WRITE); } -#define MIN_READ_RECORDS 20 -struct dm_raid1_read_record { +struct dm_raid1_bio_record { struct mirror *m; + /* if details->bi_bdev == NULL, details were not saved */ struct dm_bio_details details; + region_t write_region; }; -static struct kmem_cache *_dm_raid1_read_record_cache; - /* * Every mirror should look like this one. */ @@ -876,19 +874,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, atomic_set(&ms->suspend, 0); atomic_set(&ms->default_mirror, DEFAULT_MIRROR); - ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS, - _dm_raid1_read_record_cache); - - if (!ms->read_record_pool) { - ti->error = "Error creating mirror read_record_pool"; - kfree(ms); - return NULL; - } - ms->io_client = dm_io_client_create(); if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; - mempool_destroy(ms->read_record_pool); kfree(ms); return NULL; } @@ -900,7 +888,6 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, if (IS_ERR(ms->rh)) { ti->error = "Error creating dirty region hash"; dm_io_client_destroy(ms->io_client); - mempool_destroy(ms->read_record_pool); kfree(ms); return NULL; } @@ -916,7 +903,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, dm_io_client_destroy(ms->io_client); dm_region_hash_destroy(ms->rh); - mempool_destroy(ms->read_record_pool); kfree(ms); } @@ -1088,6 +1074,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record); ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", @@ -1155,18 +1142,20 @@ static void mirror_dtr(struct dm_target *ti) /* * Mirror mapping function */ -static int mirror_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int mirror_map(struct dm_target *ti, struct bio *bio) { int r, rw = bio_rw(bio); struct mirror *m; struct mirror_set *ms = ti->private; - struct dm_raid1_read_record *read_record = NULL; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); + struct dm_raid1_bio_record *bio_record = + dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); + + bio_record->details.bi_bdev = NULL; if (rw == WRITE) { /* Save region for mirror_end_io() handler */ - map_context->ll = dm_rh_bio_to_region(ms->rh, bio); + bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio); queue_bio(ms, bio, rw); return DM_MAPIO_SUBMITTED; } @@ -1194,33 +1183,29 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, if (unlikely(!m)) return -EIO; - read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO); - if (likely(read_record)) { - dm_bio_record(&read_record->details, bio); - map_context->ptr = read_record; - read_record->m = m; - } + dm_bio_record(&bio_record->details, bio); + bio_record->m = m; map_bio(m, bio); return DM_MAPIO_REMAPPED; } -static int mirror_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) { int rw = bio_rw(bio); struct mirror_set *ms = (struct mirror_set *) ti->private; struct mirror *m = NULL; struct dm_bio_details *bd = NULL; - struct dm_raid1_read_record *read_record = map_context->ptr; + struct dm_raid1_bio_record *bio_record = + dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); /* * We need to dec pending if this was a write. */ if (rw == WRITE) { if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) - dm_rh_dec(ms->rh, map_context->ll); + dm_rh_dec(ms->rh, bio_record->write_region); return error; } @@ -1231,7 +1216,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, goto out; if (unlikely(error)) { - if (!read_record) { + if (!bio_record->details.bi_bdev) { /* * There wasn't enough memory to record necessary * information for a retry or there was no other @@ -1241,7 +1226,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, return -EIO; } - m = read_record->m; + m = bio_record->m; DMERR("Mirror read failed from %s. Trying alternative device.", m->dev->name); @@ -1253,22 +1238,18 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, * mirror. */ if (default_ok(m) || mirror_available(ms, bio)) { - bd = &read_record->details; + bd = &bio_record->details; dm_bio_restore(bd, bio); - mempool_free(read_record, ms->read_record_pool); - map_context->ptr = NULL; + bio_record->details.bi_bdev = NULL; queue_bio(ms, bio, rw); - return 1; + return DM_ENDIO_INCOMPLETE; } DMERR("All replicated volumes dead, failing I/O"); } out: - if (read_record) { - mempool_free(read_record, ms->read_record_pool); - map_context->ptr = NULL; - } + bio_record->details.bi_bdev = NULL; return error; } @@ -1422,7 +1403,7 @@ static int mirror_iterate_devices(struct dm_target *ti, static struct target_type mirror_target = { .name = "mirror", - .version = {1, 12, 1}, + .version = {1, 13, 1}, .module = THIS_MODULE, .ctr = mirror_ctr, .dtr = mirror_dtr, @@ -1439,13 +1420,6 @@ static int __init dm_mirror_init(void) { int r; - _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0); - if (!_dm_raid1_read_record_cache) { - DMERR("Can't allocate dm_raid1_read_record cache"); - r = -ENOMEM; - goto bad_cache; - } - r = dm_register_target(&mirror_target); if (r < 0) { DMERR("Failed to register mirror target"); @@ -1455,15 +1429,12 @@ static int __init dm_mirror_init(void) return 0; bad_target: - kmem_cache_destroy(_dm_raid1_read_record_cache); -bad_cache: return r; } static void __exit dm_mirror_exit(void) { dm_unregister_target(&mirror_target); - kmem_cache_destroy(_dm_raid1_read_record_cache); } /* Module hooks */ diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a143921feaf6..59fc18ae52c2 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -79,7 +79,6 @@ struct dm_snapshot { /* Chunks with outstanding reads */ spinlock_t tracked_chunk_lock; - mempool_t *tracked_chunk_pool; struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; /* The on disk metadata handler */ @@ -191,35 +190,38 @@ struct dm_snap_tracked_chunk { chunk_t chunk; }; -static struct kmem_cache *tracked_chunk_cache; +static void init_tracked_chunk(struct bio *bio) +{ + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + INIT_HLIST_NODE(&c->node); +} -static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, - chunk_t chunk) +static bool is_bio_tracked(struct bio *bio) { - struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, - GFP_NOIO); - unsigned long flags; + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + return !hlist_unhashed(&c->node); +} + +static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk) +{ + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); c->chunk = chunk; - spin_lock_irqsave(&s->tracked_chunk_lock, flags); + spin_lock_irq(&s->tracked_chunk_lock); hlist_add_head(&c->node, &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); - spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); - - return c; + spin_unlock_irq(&s->tracked_chunk_lock); } -static void stop_tracking_chunk(struct dm_snapshot *s, - struct dm_snap_tracked_chunk *c) +static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) { + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); unsigned long flags; spin_lock_irqsave(&s->tracked_chunk_lock, flags); hlist_del(&c->node); spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); - - mempool_free(c, s->tracked_chunk_pool); } static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) @@ -1120,14 +1122,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_pending_pool; } - s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, - tracked_chunk_cache); - if (!s->tracked_chunk_pool) { - ti->error = "Could not allocate tracked_chunk mempool for " - "tracking reads"; - goto bad_tracked_chunk_pool; - } - for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); @@ -1135,6 +1129,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = s; ti->num_flush_requests = num_flush_requests; + ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ @@ -1183,9 +1178,6 @@ bad_read_metadata: unregister_snapshot(s); bad_load_and_register: - mempool_destroy(s->tracked_chunk_pool); - -bad_tracked_chunk_pool: mempool_destroy(s->pending_pool); bad_pending_pool: @@ -1290,8 +1282,6 @@ static void snapshot_dtr(struct dm_target *ti) BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); #endif - mempool_destroy(s->tracked_chunk_pool); - __free_exceptions(s); mempool_destroy(s->pending_pool); @@ -1577,8 +1567,7 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, s->store->chunk_mask); } -static int snapshot_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int snapshot_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; struct dm_snapshot *s = ti->private; @@ -1586,6 +1575,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, chunk_t chunk; struct dm_snap_pending_exception *pe = NULL; + init_tracked_chunk(bio); + if (bio->bi_rw & REQ_FLUSH) { bio->bi_bdev = s->cow->bdev; return DM_MAPIO_REMAPPED; @@ -1670,7 +1661,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, } } else { bio->bi_bdev = s->origin->bdev; - map_context->ptr = track_chunk(s, chunk); + track_chunk(s, bio, chunk); } out_unlock: @@ -1691,20 +1682,20 @@ out: * If merging is currently taking place on the chunk in question, the * I/O is deferred by adding it to s->bios_queued_during_merge. */ -static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; struct dm_snapshot *s = ti->private; int r = DM_MAPIO_REMAPPED; chunk_t chunk; + init_tracked_chunk(bio); + if (bio->bi_rw & REQ_FLUSH) { - if (!map_context->target_request_nr) + if (!dm_bio_get_target_request_nr(bio)) bio->bi_bdev = s->origin->bdev; else bio->bi_bdev = s->cow->bdev; - map_context->ptr = NULL; return DM_MAPIO_REMAPPED; } @@ -1733,7 +1724,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, remap_exception(s, e, bio, chunk); if (bio_rw(bio) == WRITE) - map_context->ptr = track_chunk(s, chunk); + track_chunk(s, bio, chunk); goto out_unlock; } @@ -1751,14 +1742,12 @@ out_unlock: return r; } -static int snapshot_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error) { struct dm_snapshot *s = ti->private; - struct dm_snap_tracked_chunk *c = map_context->ptr; - if (c) - stop_tracking_chunk(s, c); + if (is_bio_tracked(bio)) + stop_tracking_chunk(s, bio); return 0; } @@ -2127,8 +2116,7 @@ static void origin_dtr(struct dm_target *ti) dm_put_device(ti, dev); } -static int origin_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int origin_map(struct dm_target *ti, struct bio *bio) { struct dm_dev *dev = ti->private; bio->bi_bdev = dev->bdev; @@ -2193,7 +2181,7 @@ static int origin_iterate_devices(struct dm_target *ti, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 7, 1}, + .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, @@ -2206,7 +2194,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2220,7 +2208,7 @@ static struct target_type snapshot_target = { static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2281,17 +2269,8 @@ static int __init dm_snapshot_init(void) goto bad_pending_cache; } - tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); - if (!tracked_chunk_cache) { - DMERR("Couldn't create cache to track chunks in use."); - r = -ENOMEM; - goto bad_tracked_chunk_cache; - } - return 0; -bad_tracked_chunk_cache: - kmem_cache_destroy(pending_cache); bad_pending_cache: kmem_cache_destroy(exception_cache); bad_exception_cache: @@ -2317,7 +2296,6 @@ static void __exit dm_snapshot_exit(void) exit_origin_hash(); kmem_cache_destroy(pending_cache); kmem_cache_destroy(exception_cache); - kmem_cache_destroy(tracked_chunk_cache); dm_exception_store_exit(); } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index e2f876539743..c89cde86d400 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -162,6 +162,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = stripes; ti->num_discard_requests = stripes; + ti->num_write_same_requests = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -251,8 +252,8 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, *result += sc->chunk_size; /* next chunk */ } -static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, - uint32_t target_stripe) +static int stripe_map_range(struct stripe_c *sc, struct bio *bio, + uint32_t target_stripe) { sector_t begin, end; @@ -271,23 +272,23 @@ static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, } } -static int stripe_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; unsigned target_request_nr; if (bio->bi_rw & REQ_FLUSH) { - target_request_nr = map_context->target_request_nr; + target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; return DM_MAPIO_REMAPPED; } - if (unlikely(bio->bi_rw & REQ_DISCARD)) { - target_request_nr = map_context->target_request_nr; + if (unlikely(bio->bi_rw & REQ_DISCARD) || + unlikely(bio->bi_rw & REQ_WRITE_SAME)) { + target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); - return stripe_map_discard(sc, bio, target_request_nr); + return stripe_map_range(sc, bio, target_request_nr); } stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); @@ -342,8 +343,7 @@ static int stripe_status(struct dm_target *ti, status_type_t type, return 0; } -static int stripe_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) { unsigned i; char major_minor[16]; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 100368eb7991..daf25d0890b3 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -967,13 +967,22 @@ bool dm_table_request_based(struct dm_table *t) int dm_table_alloc_md_mempools(struct dm_table *t) { unsigned type = dm_table_get_type(t); + unsigned per_bio_data_size = 0; + struct dm_target *tgt; + unsigned i; if (unlikely(type == DM_TYPE_NONE)) { DMWARN("no table type is set, can't allocate mempools"); return -EINVAL; } - t->mempools = dm_alloc_md_mempools(type, t->integrity_supported); + if (type == DM_TYPE_BIO_BASED) + for (i = 0; i < t->num_targets; i++) { + tgt = t->targets + i; + per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); + } + + t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size); if (!t->mempools) return -ENOMEM; @@ -1414,6 +1423,33 @@ static bool dm_table_all_devices_attribute(struct dm_table *t, return 1; } +static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !q->limits.max_write_same_sectors; +} + +static bool dm_table_supports_write_same(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i = 0; + + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_write_same_requests) + return false; + + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) + return false; + } + + return true; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1445,6 +1481,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); + if (!dm_table_supports_write_same(t)) + q->limits.max_write_same_sectors = 0; + dm_table_set_integrity(t); /* diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 8da366cf381c..617d21a77256 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -126,15 +126,14 @@ static void io_err_dtr(struct dm_target *tt) /* empty */ } -static int io_err_map(struct dm_target *tt, struct bio *bio, - union map_info *map_context) +static int io_err_map(struct dm_target *tt, struct bio *bio) { return -EIO; } static struct target_type error_target = { .name = "error", - .version = {1, 0, 1}, + .version = {1, 1, 0}, .ctr = io_err_ctr, .dtr = io_err_dtr, .map = io_err_map, diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 693e149e9727..4d6e85367b84 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -408,7 +408,7 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->tl_info.tm = pmd->tm; pmd->tl_info.levels = 1; - pmd->tl_info.value_type.context = &pmd->info; + pmd->tl_info.value_type.context = &pmd->bl_info; pmd->tl_info.value_type.size = sizeof(__le64); pmd->tl_info.value_type.inc = subtree_inc; pmd->tl_info.value_type.dec = subtree_dec; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 058acf3a5ba7..675ae5274016 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -186,7 +186,6 @@ struct pool { struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; - mempool_t *endio_hook_pool; process_bio_fn process_bio; process_bio_fn process_discard; @@ -304,7 +303,7 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) bio_list_init(master); while ((bio = bio_list_pop(&bios))) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); if (h->tc == tc) bio_endio(bio, DM_ENDIO_REQUEUE); @@ -368,6 +367,17 @@ static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) dm_thin_changed_this_transaction(tc->td); } +static void inc_all_io_entry(struct pool *pool, struct bio *bio) +{ + struct dm_thin_endio_hook *h; + + if (bio->bi_rw & REQ_DISCARD) + return; + + h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); + h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds); +} + static void issue(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; @@ -474,7 +484,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) static void overwrite_endio(struct bio *bio, int err) { unsigned long flags; - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct dm_thin_new_mapping *m = h->overwrite_mapping; struct pool *pool = m->tc->pool; @@ -499,8 +509,7 @@ static void overwrite_endio(struct bio *bio, int err) /* * This sends the bios in the cell back to the deferred_bios list. */ -static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, - dm_block_t data_block) +static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) { struct pool *pool = tc->pool; unsigned long flags; @@ -513,17 +522,13 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, } /* - * Same as cell_defer above, except it omits one particular detainee, - * a write bio that covers the block and has already been processed. + * Same as cell_defer except it omits the original holder of the cell. */ -static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell) +static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) { - struct bio_list bios; struct pool *pool = tc->pool; unsigned long flags; - bio_list_init(&bios); - spin_lock_irqsave(&pool->lock, flags); dm_cell_release_no_holder(cell, &pool->deferred_bios); spin_unlock_irqrestore(&pool->lock, flags); @@ -561,7 +566,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); if (r) { - DMERR("dm_thin_insert_block() failed"); + DMERR_LIMIT("dm_thin_insert_block() failed"); dm_cell_error(m->cell); goto out; } @@ -573,10 +578,10 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) * the bios in the cell. */ if (bio) { - cell_defer_except(tc, m->cell); + cell_defer_no_holder(tc, m->cell); bio_endio(bio, 0); } else - cell_defer(tc, m->cell, m->data_block); + cell_defer(tc, m->cell); out: list_del(&m->list); @@ -588,8 +593,8 @@ static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) struct thin_c *tc = m->tc; bio_io_error(m->bio); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); + cell_defer_no_holder(tc, m->cell); + cell_defer_no_holder(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } @@ -597,13 +602,15 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; + inc_all_io_entry(tc->pool, m->bio); + cell_defer_no_holder(tc, m->cell); + cell_defer_no_holder(tc, m->cell2); + if (m->pass_discard) remap_and_issue(tc, m->bio, m->data_block); else bio_endio(m->bio, 0); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } @@ -614,7 +621,7 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m) r = dm_thin_remove_block(tc->td, m->virt_block); if (r) - DMERR("dm_thin_remove_block() failed"); + DMERR_LIMIT("dm_thin_remove_block() failed"); process_prepared_discard_passdown(m); } @@ -706,11 +713,12 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, * bio immediately. Otherwise we use kcopyd to clone the data first. */ if (io_overwrites_block(pool, bio)) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); + inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_dest); } else { struct dm_io_region from, to; @@ -727,7 +735,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, 0, copy_complete, m); if (r < 0) { mempool_free(m, pool->mapping_pool); - DMERR("dm_kcopyd_copy() failed"); + DMERR_LIMIT("dm_kcopyd_copy() failed"); dm_cell_error(cell); } } @@ -775,11 +783,12 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, process_prepared_mapping(m); else if (io_overwrites_block(pool, bio)) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); + inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_block); } else { int r; @@ -792,7 +801,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); if (r < 0) { mempool_free(m, pool->mapping_pool); - DMERR("dm_kcopyd_zero() failed"); + DMERR_LIMIT("dm_kcopyd_zero() failed"); dm_cell_error(cell); } } @@ -804,7 +813,7 @@ static int commit(struct pool *pool) r = dm_pool_commit_metadata(pool->pmd); if (r) - DMERR("commit failed, error = %d", r); + DMERR_LIMIT("commit failed: error = %d", r); return r; } @@ -889,7 +898,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) */ static void retry_on_resume(struct bio *bio) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; struct pool *pool = tc->pool; unsigned long flags; @@ -936,7 +945,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) */ build_data_key(tc->td, lookup_result.block, &key2); if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) { - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); break; } @@ -962,13 +971,15 @@ static void process_discard(struct thin_c *tc, struct bio *bio) wake_worker(pool); } } else { + inc_all_io_entry(pool, bio); + cell_defer_no_holder(tc, cell); + cell_defer_no_holder(tc, cell2); + /* * The DM core makes sure that the discard doesn't span * a block boundary. So we submit the discard of a * partial block appropriately. */ - dm_cell_release_singleton(cell, bio); - dm_cell_release_singleton(cell2, bio); if ((!lookup_result.shared) && pool->pf.discard_passdown) remap_and_issue(tc, bio, lookup_result.block); else @@ -980,13 +991,14 @@ static void process_discard(struct thin_c *tc, struct bio *bio) /* * It isn't provisioned, just forget it. */ - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); bio_endio(bio, 0); break; default: - DMERR("discard: find block unexpectedly returned %d", r); - dm_cell_release_singleton(cell, bio); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); + cell_defer_no_holder(tc, cell); bio_io_error(bio); break; } @@ -1012,7 +1024,8 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, break; default: - DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", + __func__, r); dm_cell_error(cell); break; } @@ -1037,11 +1050,12 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, if (bio_data_dir(bio) == WRITE && bio->bi_size) break_sharing(tc, bio, block, &key, lookup_result, cell); else { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); + inc_all_io_entry(pool, bio); + cell_defer_no_holder(tc, cell); - dm_cell_release_singleton(cell, bio); remap_and_issue(tc, bio, lookup_result->block); } } @@ -1056,7 +1070,9 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block * Remap empty bios (flushes) immediately, without provisioning. */ if (!bio->bi_size) { - dm_cell_release_singleton(cell, bio); + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_and_issue(tc, bio, 0); return; } @@ -1066,7 +1082,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block */ if (bio_data_dir(bio) == READ) { zero_fill_bio(bio); - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); bio_endio(bio, 0); return; } @@ -1085,7 +1101,8 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block break; default: - DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", + __func__, r); set_pool_mode(tc->pool, PM_READ_ONLY); dm_cell_error(cell); break; @@ -1111,34 +1128,31 @@ static void process_bio(struct thin_c *tc, struct bio *bio) r = dm_thin_find_block(tc->td, block, 1, &lookup_result); switch (r) { case 0: - /* - * We can release this cell now. This thread is the only - * one that puts bios into a cell, and we know there were - * no preceding bios. - */ - /* - * TODO: this will probably have to change when discard goes - * back in. - */ - dm_cell_release_singleton(cell, bio); - - if (lookup_result.shared) + if (lookup_result.shared) { process_shared_bio(tc, bio, block, &lookup_result); - else + cell_defer_no_holder(tc, cell); + } else { + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_and_issue(tc, bio, lookup_result.block); + } break; case -ENODATA: if (bio_data_dir(bio) == READ && tc->origin_dev) { - dm_cell_release_singleton(cell, bio); + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_to_origin_and_issue(tc, bio); } else provision_block(tc, bio, block, cell); break; default: - DMERR("dm_thin_find_block() failed, error = %d", r); - dm_cell_release_singleton(cell, bio); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); + cell_defer_no_holder(tc, cell); bio_io_error(bio); break; } @@ -1156,8 +1170,10 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) case 0: if (lookup_result.shared && (rw == WRITE) && bio->bi_size) bio_io_error(bio); - else + else { + inc_all_io_entry(tc->pool, bio); remap_and_issue(tc, bio, lookup_result.block); + } break; case -ENODATA: @@ -1167,6 +1183,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) } if (tc->origin_dev) { + inc_all_io_entry(tc->pool, bio); remap_to_origin_and_issue(tc, bio); break; } @@ -1176,7 +1193,8 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) break; default: - DMERR("dm_thin_find_block() failed, error = %d", r); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); bio_io_error(bio); break; } @@ -1207,7 +1225,7 @@ static void process_deferred_bios(struct pool *pool) spin_unlock_irqrestore(&pool->lock, flags); while ((bio = bio_list_pop(&bios))) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; /* @@ -1340,32 +1358,30 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) wake_worker(pool); } -static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio) +static void thin_hook_bio(struct thin_c *tc, struct bio *bio) { - struct pool *pool = tc->pool; - struct dm_thin_endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO); + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->tc = tc; h->shared_read_entry = NULL; - h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : dm_deferred_entry_inc(pool->all_io_ds); + h->all_io_entry = NULL; h->overwrite_mapping = NULL; - - return h; } /* * Non-blocking function called from the thin target's map function. */ -static int thin_bio_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int thin_bio_map(struct dm_target *ti, struct bio *bio) { int r; struct thin_c *tc = ti->private; dm_block_t block = get_bio_block(tc, bio); struct dm_thin_device *td = tc->td; struct dm_thin_lookup_result result; + struct dm_bio_prison_cell *cell1, *cell2; + struct dm_cell_key key; - map_context->ptr = thin_hook_bio(tc, bio); + thin_hook_bio(tc, bio); if (get_pool_mode(tc->pool) == PM_FAIL) { bio_io_error(bio); @@ -1400,12 +1416,25 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * shared flag will be set in their case. */ thin_defer_bio(tc, bio); - r = DM_MAPIO_SUBMITTED; - } else { - remap(tc, bio, result.block); - r = DM_MAPIO_REMAPPED; + return DM_MAPIO_SUBMITTED; } - break; + + build_virtual_key(tc->td, block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1)) + return DM_MAPIO_SUBMITTED; + + build_data_key(tc->td, result.block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) { + cell_defer_no_holder(tc, cell1); + return DM_MAPIO_SUBMITTED; + } + + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell2); + cell_defer_no_holder(tc, cell1); + + remap(tc, bio, result.block); + return DM_MAPIO_REMAPPED; case -ENODATA: if (get_pool_mode(tc->pool) == PM_READ_ONLY) { @@ -1414,8 +1443,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * of doing so. Just error it. */ bio_io_error(bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; } /* fall through */ @@ -1425,8 +1453,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * provide the hint to load the metadata into cache. */ thin_defer_bio(tc, bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; default: /* @@ -1435,11 +1462,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * pool is switched to fail-io mode. */ bio_io_error(bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; } - - return r; } static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) @@ -1566,14 +1590,12 @@ static void __pool_destroy(struct pool *pool) if (pool->next_mapping) mempool_free(pool->next_mapping, pool->mapping_pool); mempool_destroy(pool->mapping_pool); - mempool_destroy(pool->endio_hook_pool); dm_deferred_set_destroy(pool->shared_read_ds); dm_deferred_set_destroy(pool->all_io_ds); kfree(pool); } static struct kmem_cache *_new_mapping_cache; -static struct kmem_cache *_endio_hook_cache; static struct pool *pool_create(struct mapped_device *pool_md, struct block_device *metadata_dev, @@ -1667,13 +1689,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_mapping_pool; } - pool->endio_hook_pool = mempool_create_slab_pool(ENDIO_HOOK_POOL_SIZE, - _endio_hook_cache); - if (!pool->endio_hook_pool) { - *error = "Error creating pool's endio_hook mempool"; - err_p = ERR_PTR(-ENOMEM); - goto bad_endio_hook_pool; - } pool->ref_count = 1; pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; @@ -1682,8 +1697,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, return pool; -bad_endio_hook_pool: - mempool_destroy(pool->mapping_pool); bad_mapping_pool: dm_deferred_set_destroy(pool->all_io_ds); bad_all_io_ds: @@ -1966,8 +1979,7 @@ out_unlock: return r; } -static int pool_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int pool_map(struct dm_target *ti, struct bio *bio) { int r; struct pool_c *pt = ti->private; @@ -2358,7 +2370,9 @@ static int pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("rw "); - if (pool->pf.discard_enabled && pool->pf.discard_passdown) + if (!pool->pf.discard_enabled) + DMEMIT("ignore_discard"); + else if (pool->pf.discard_passdown) DMEMIT("discard_passdown"); else DMEMIT("no_discard_passdown"); @@ -2454,7 +2468,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2576,6 +2590,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_flush_requests = 1; ti->flush_supported = true; + ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { @@ -2609,20 +2624,17 @@ out_unlock: return r; } -static int thin_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int thin_map(struct dm_target *ti, struct bio *bio) { bio->bi_sector = dm_target_offset(ti, bio->bi_sector); - return thin_bio_map(ti, bio, map_context); + return thin_bio_map(ti, bio); } -static int thin_endio(struct dm_target *ti, - struct bio *bio, int err, - union map_info *map_context) +static int thin_endio(struct dm_target *ti, struct bio *bio, int err) { unsigned long flags; - struct dm_thin_endio_hook *h = map_context->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct list_head work; struct dm_thin_new_mapping *m, *tmp; struct pool *pool = h->tc->pool; @@ -2643,14 +2655,15 @@ static int thin_endio(struct dm_target *ti, if (h->all_io_entry) { INIT_LIST_HEAD(&work); dm_deferred_entry_dec(h->all_io_entry, &work); - spin_lock_irqsave(&pool->lock, flags); - list_for_each_entry_safe(m, tmp, &work, list) - list_add(&m->list, &pool->prepared_discards); - spin_unlock_irqrestore(&pool->lock, flags); + if (!list_empty(&work)) { + spin_lock_irqsave(&pool->lock, flags); + list_for_each_entry_safe(m, tmp, &work, list) + list_add(&m->list, &pool->prepared_discards); + spin_unlock_irqrestore(&pool->lock, flags); + wake_worker(pool); + } } - mempool_free(h, pool->endio_hook_pool); - return 0; } @@ -2745,7 +2758,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, @@ -2779,14 +2792,8 @@ static int __init dm_thin_init(void) if (!_new_mapping_cache) goto bad_new_mapping_cache; - _endio_hook_cache = KMEM_CACHE(dm_thin_endio_hook, 0); - if (!_endio_hook_cache) - goto bad_endio_hook_cache; - return 0; -bad_endio_hook_cache: - kmem_cache_destroy(_new_mapping_cache); bad_new_mapping_cache: dm_unregister_target(&pool_target); bad_pool_target: @@ -2801,7 +2808,6 @@ static void dm_thin_exit(void) dm_unregister_target(&pool_target); kmem_cache_destroy(_new_mapping_cache); - kmem_cache_destroy(_endio_hook_cache); } module_init(dm_thin_init); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 9e7328bb4030..52cde982164a 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -55,7 +55,6 @@ struct dm_verity { unsigned shash_descsize;/* the size of temporary space for crypto */ int hash_failed; /* set to 1 if hash of any block failed */ - mempool_t *io_mempool; /* mempool of struct dm_verity_io */ mempool_t *vec_mempool; /* mempool of bio vector */ struct workqueue_struct *verify_wq; @@ -66,7 +65,6 @@ struct dm_verity { struct dm_verity_io { struct dm_verity *v; - struct bio *bio; /* original values of bio->bi_end_io and bio->bi_private */ bio_end_io_t *orig_bi_end_io; @@ -389,8 +387,8 @@ test_block_hash: */ static void verity_finish_io(struct dm_verity_io *io, int error) { - struct bio *bio = io->bio; struct dm_verity *v = io->v; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; @@ -398,8 +396,6 @@ static void verity_finish_io(struct dm_verity_io *io, int error) if (io->io_vec != io->io_vec_inline) mempool_free(io->io_vec, v->vec_mempool); - mempool_free(io, v->io_mempool); - bio_endio(bio, error); } @@ -462,8 +458,7 @@ no_prefetch_cluster: * Bio map function. It allocates dm_verity_io structure and bio vector and * fills them. Then it issues prefetches and the I/O. */ -static int verity_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int verity_map(struct dm_target *ti, struct bio *bio) { struct dm_verity *v = ti->private; struct dm_verity_io *io; @@ -486,9 +481,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio, if (bio_data_dir(bio) == WRITE) return -EIO; - io = mempool_alloc(v->io_mempool, GFP_NOIO); + io = dm_per_bio_data(bio, ti->per_bio_data_size); io->v = v; - io->bio = bio; io->orig_bi_end_io = bio->bi_end_io; io->orig_bi_private = bio->bi_private; io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); @@ -610,9 +604,6 @@ static void verity_dtr(struct dm_target *ti) if (v->vec_mempool) mempool_destroy(v->vec_mempool); - if (v->io_mempool) - mempool_destroy(v->io_mempool); - if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -841,13 +832,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - v->io_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, - sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2); - if (!v->io_mempool) { - ti->error = "Cannot allocate io mempool"; - r = -ENOMEM; - goto bad; - } + ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, BIO_MAX_PAGES * sizeof(struct bio_vec)); @@ -875,7 +860,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index cc2b3cb81946..69a5c3b3b340 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -33,8 +33,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * Return zeros only on reads */ -static int zero_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int zero_map(struct dm_target *ti, struct bio *bio) { switch(bio_rw(bio)) { case READ: @@ -56,7 +55,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio, static struct target_type zero_target = { .name = "zero", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = zero_ctr, .map = zero_map, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 77e6eff41cae..c72e4d5a9617 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -63,18 +63,6 @@ struct dm_io { }; /* - * For bio-based dm. - * One of these is allocated per target within a bio. Hopefully - * this will be simplified out one day. - */ -struct dm_target_io { - struct dm_io *io; - struct dm_target *ti; - union map_info info; - struct bio clone; -}; - -/* * For request-based dm. * One of these is allocated per request. */ @@ -657,7 +645,7 @@ static void clone_endio(struct bio *bio, int error) error = -EIO; if (endio) { - r = endio(tio->ti, bio, error, &tio->info); + r = endio(tio->ti, bio, error); if (r < 0 || r == DM_ENDIO_REQUEUE) /* * error and requeue request are handled @@ -1016,7 +1004,7 @@ static void __map_bio(struct dm_target *ti, struct dm_target_io *tio) */ atomic_inc(&tio->io->io_count); sector = clone->bi_sector; - r = ti->type->map(ti, clone, &tio->info); + r = ti->type->map(ti, clone); if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ @@ -1111,6 +1099,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); + tio->target_request_nr = 0; return tio; } @@ -1121,7 +1110,7 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs); struct bio *clone = &tio->clone; - tio->info.target_request_nr = request_nr; + tio->target_request_nr = request_nr; /* * Discard requests require the bio's inline iovecs be initialized. @@ -1174,7 +1163,28 @@ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) ci->sector_count = 0; } -static int __clone_and_map_discard(struct clone_info *ci) +typedef unsigned (*get_num_requests_fn)(struct dm_target *ti); + +static unsigned get_num_discard_requests(struct dm_target *ti) +{ + return ti->num_discard_requests; +} + +static unsigned get_num_write_same_requests(struct dm_target *ti) +{ + return ti->num_write_same_requests; +} + +typedef bool (*is_split_required_fn)(struct dm_target *ti); + +static bool is_split_required_for_discard(struct dm_target *ti) +{ + return ti->split_discard_requests; +} + +static int __clone_and_map_changing_extent_only(struct clone_info *ci, + get_num_requests_fn get_num_requests, + is_split_required_fn is_split_required) { struct dm_target *ti; sector_t len; @@ -1185,15 +1195,15 @@ static int __clone_and_map_discard(struct clone_info *ci) return -EIO; /* - * Even though the device advertised discard support, - * that does not mean every target supports it, and + * Even though the device advertised support for this type of + * request, that does not mean every target supports it, and * reconfiguration might also have changed that since the * check was performed. */ - if (!ti->num_discard_requests) + if (!get_num_requests || !get_num_requests(ti)) return -EOPNOTSUPP; - if (!ti->split_discard_requests) + if (is_split_required && !is_split_required(ti)) len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); else len = min(ci->sector_count, max_io_len(ci->sector, ti)); @@ -1206,6 +1216,17 @@ static int __clone_and_map_discard(struct clone_info *ci) return 0; } +static int __clone_and_map_discard(struct clone_info *ci) +{ + return __clone_and_map_changing_extent_only(ci, get_num_discard_requests, + is_split_required_for_discard); +} + +static int __clone_and_map_write_same(struct clone_info *ci) +{ + return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL); +} + static int __clone_and_map(struct clone_info *ci) { struct bio *bio = ci->bio; @@ -1215,6 +1236,8 @@ static int __clone_and_map(struct clone_info *ci) if (unlikely(bio->bi_rw & REQ_DISCARD)) return __clone_and_map_discard(ci); + else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) + return __clone_and_map_write_same(ci); ti = dm_table_find_target(ci->map, ci->sector); if (!dm_target_is_valid(ti)) @@ -1946,13 +1969,20 @@ static void free_dev(struct mapped_device *md) static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { - struct dm_md_mempools *p; + struct dm_md_mempools *p = dm_table_get_md_mempools(t); - if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) - /* the md already has necessary mempools */ + if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) { + /* + * The md already has necessary mempools. Reload just the + * bioset because front_pad may have changed because + * a different table was loaded. + */ + bioset_free(md->bs); + md->bs = p->bs; + p->bs = NULL; goto out; + } - p = dm_table_get_md_mempools(t); BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); md->io_pool = p->io_pool; @@ -2711,7 +2741,7 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) { struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; @@ -2719,6 +2749,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) if (!pools) return NULL; + per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io)); + pools->io_pool = (type == DM_TYPE_BIO_BASED) ? mempool_create_slab_pool(MIN_IOS, _io_cache) : mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); @@ -2734,7 +2766,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) pools->bs = (type == DM_TYPE_BIO_BASED) ? bioset_create(pool_size, - offsetof(struct dm_target_io, clone)) : + per_bio_data_size + offsetof(struct dm_target_io, clone)) : bioset_create(pool_size, offsetof(struct dm_rq_clone_bio_info, clone)); if (!pools->bs) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 6a99fefaa743..45b97da1bd06 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -159,7 +159,7 @@ void dm_kcopyd_exit(void); /* * Mempool operations */ -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity); +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size); void dm_free_md_mempools(struct dm_md_mempools *pools); #endif diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index a3ae09124a67..28c3ed072a79 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -428,15 +428,17 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, if (!v) return 0; r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); - if (unlikely(r)) + if (unlikely(r)) { + DMERR_LIMIT("%s validator check failed for block %llu", v->name, + (unsigned long long) dm_bufio_get_block_number(buf)); return r; + } aux->validator = v; } else { if (unlikely(aux->validator != v)) { - DMERR("validator mismatch (old=%s vs new=%s) for block %llu", - aux->validator->name, v ? v->name : "NULL", - (unsigned long long) - dm_bufio_get_block_number(buf)); + DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", + aux->validator->name, v ? v->name : "NULL", + (unsigned long long) dm_bufio_get_block_number(buf)); return -EINVAL; } } diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index 5709bfeab1e8..accbb05f17b6 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -36,13 +36,13 @@ struct node_header { __le32 padding; } __packed; -struct node { +struct btree_node { struct node_header header; __le64 keys[0]; } __packed; -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt); int new_block(struct dm_btree_info *info, struct dm_block **result); @@ -64,7 +64,7 @@ struct ro_spine { void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); int exit_ro_spine(struct ro_spine *s); int ro_step(struct ro_spine *s, dm_block_t new_child); -struct node *ro_node(struct ro_spine *s); +struct btree_node *ro_node(struct ro_spine *s); struct shadow_spine { struct dm_btree_info *info; @@ -98,17 +98,17 @@ int shadow_root(struct shadow_spine *s); /* * Some inlines. */ -static inline __le64 *key_ptr(struct node *n, uint32_t index) +static inline __le64 *key_ptr(struct btree_node *n, uint32_t index) { return n->keys + index; } -static inline void *value_base(struct node *n) +static inline void *value_base(struct btree_node *n) { return &n->keys[le32_to_cpu(n->header.max_entries)]; } -static inline void *value_ptr(struct node *n, uint32_t index) +static inline void *value_ptr(struct btree_node *n, uint32_t index) { uint32_t value_size = le32_to_cpu(n->header.value_size); return value_base(n) + (value_size * index); @@ -117,7 +117,7 @@ static inline void *value_ptr(struct node *n, uint32_t index) /* * Assumes the values are suitably-aligned and converts to core format. */ -static inline uint64_t value64(struct node *n, uint32_t index) +static inline uint64_t value64(struct btree_node *n, uint32_t index) { __le64 *values_le = value_base(n); @@ -127,7 +127,7 @@ static inline uint64_t value64(struct node *n, uint32_t index) /* * Searching for a key within a single node. */ -int lower_bound(struct node *n, uint64_t key); +int lower_bound(struct btree_node *n, uint64_t key); extern struct dm_block_validator btree_node_validator; diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index aa71e2359a07..c4f28133ef82 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -53,7 +53,7 @@ /* * Some little utilities for moving node data around. */ -static void node_shift(struct node *n, int shift) +static void node_shift(struct btree_node *n, int shift) { uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); uint32_t value_size = le32_to_cpu(n->header.value_size); @@ -79,7 +79,7 @@ static void node_shift(struct node *n, int shift) } } -static void node_copy(struct node *left, struct node *right, int shift) +static void node_copy(struct btree_node *left, struct btree_node *right, int shift) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t value_size = le32_to_cpu(left->header.value_size); @@ -108,7 +108,7 @@ static void node_copy(struct node *left, struct node *right, int shift) /* * Delete a specific entry from a leaf node. */ -static void delete_at(struct node *n, unsigned index) +static void delete_at(struct btree_node *n, unsigned index) { unsigned nr_entries = le32_to_cpu(n->header.nr_entries); unsigned nr_to_copy = nr_entries - (index + 1); @@ -128,7 +128,7 @@ static void delete_at(struct node *n, unsigned index) n->header.nr_entries = cpu_to_le32(nr_entries - 1); } -static unsigned merge_threshold(struct node *n) +static unsigned merge_threshold(struct btree_node *n) { return le32_to_cpu(n->header.max_entries) / 3; } @@ -136,7 +136,7 @@ static unsigned merge_threshold(struct node *n) struct child { unsigned index; struct dm_block *block; - struct node *n; + struct btree_node *n; }; static struct dm_btree_value_type le64_type = { @@ -147,7 +147,7 @@ static struct dm_btree_value_type le64_type = { .equal = NULL }; -static int init_child(struct dm_btree_info *info, struct node *parent, +static int init_child(struct dm_btree_info *info, struct btree_node *parent, unsigned index, struct child *result) { int r, inc; @@ -177,7 +177,7 @@ static int exit_child(struct dm_btree_info *info, struct child *c) return dm_tm_unlock(info->tm, c->block); } -static void shift(struct node *left, struct node *right, int count) +static void shift(struct btree_node *left, struct btree_node *right, int count) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); @@ -203,11 +203,11 @@ static void shift(struct node *left, struct node *right, int count) right->header.nr_entries = cpu_to_le32(nr_right + count); } -static void __rebalance2(struct dm_btree_info *info, struct node *parent, +static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *r) { - struct node *left = l->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); unsigned threshold = 2 * merge_threshold(left) + 1; @@ -239,7 +239,7 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent; + struct btree_node *parent; struct child left, right; parent = dm_block_data(shadow_current(s)); @@ -270,9 +270,9 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, * in right, then rebalance2. This wastes some cpu, but I want something * simple atm. */ -static void delete_center_node(struct dm_btree_info *info, struct node *parent, +static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { uint32_t max_entries = le32_to_cpu(left->header.max_entries); @@ -301,9 +301,9 @@ static void delete_center_node(struct dm_btree_info *info, struct node *parent, /* * Redistributes entries among 3 sibling nodes. */ -static void redistribute3(struct dm_btree_info *info, struct node *parent, +static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { int s; @@ -343,12 +343,12 @@ static void redistribute3(struct dm_btree_info *info, struct node *parent, *key_ptr(parent, r->index) = right->keys[0]; } -static void __rebalance3(struct dm_btree_info *info, struct node *parent, +static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r) { - struct node *left = l->n; - struct node *center = c->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *center = c->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_center = le32_to_cpu(center->header.nr_entries); @@ -371,7 +371,7 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent = dm_block_data(shadow_current(s)); + struct btree_node *parent = dm_block_data(shadow_current(s)); struct child left, center, right; /* @@ -421,7 +421,7 @@ static int get_nr_entries(struct dm_transaction_manager *tm, { int r; struct dm_block *block; - struct node *n; + struct btree_node *n; r = dm_tm_read_lock(tm, b, &btree_node_validator, &block); if (r) @@ -438,7 +438,7 @@ static int rebalance_children(struct shadow_spine *s, { int i, r, has_left_sibling, has_right_sibling; uint32_t child_entries; - struct node *n; + struct btree_node *n; n = dm_block_data(shadow_current(s)); @@ -483,7 +483,7 @@ static int rebalance_children(struct shadow_spine *s, return r; } -static int do_leaf(struct node *n, uint64_t key, unsigned *index) +static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index) { int i = lower_bound(n, key); @@ -506,7 +506,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, uint64_t key, unsigned *index) { int i = *index, r; - struct node *n; + struct btree_node *n; for (;;) { r = shadow_step(s, root, vt); @@ -556,7 +556,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, unsigned level, last_level = info->levels - 1; int index = 0, r = 0; struct shadow_spine spine; - struct node *n; + struct btree_node *n; init_shadow_spine(&spine, info); for (level = 0; level < info->levels; level++) { diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index d9a7912ee8ee..f199a0c4ed04 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -23,7 +23,7 @@ static void node_prepare_for_write(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; h->blocknr = cpu_to_le64(dm_block_location(b)); @@ -38,15 +38,15 @@ static int node_check(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; size_t value_size; __le32 csum_disk; uint32_t flags; if (dm_block_location(b) != le64_to_cpu(h->blocknr)) { - DMERR("node_check failed blocknr %llu wanted %llu", - le64_to_cpu(h->blocknr), dm_block_location(b)); + DMERR_LIMIT("node_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(h->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -54,8 +54,8 @@ static int node_check(struct dm_block_validator *v, block_size - sizeof(__le32), BTREE_CSUM_XOR)); if (csum_disk != h->csum) { - DMERR("node_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); + DMERR_LIMIT("node_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); return -EILSEQ; } @@ -63,12 +63,12 @@ static int node_check(struct dm_block_validator *v, if (sizeof(struct node_header) + (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) { - DMERR("node_check failed: max_entries too large"); + DMERR_LIMIT("node_check failed: max_entries too large"); return -EILSEQ; } if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) { - DMERR("node_check failed, too many entries"); + DMERR_LIMIT("node_check failed: too many entries"); return -EILSEQ; } @@ -77,7 +77,7 @@ static int node_check(struct dm_block_validator *v, */ flags = le32_to_cpu(h->flags); if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) { - DMERR("node_check failed, node is neither INTERNAL or LEAF"); + DMERR_LIMIT("node_check failed: node is neither INTERNAL or LEAF"); return -EILSEQ; } @@ -164,7 +164,7 @@ int ro_step(struct ro_spine *s, dm_block_t new_child) return r; } -struct node *ro_node(struct ro_spine *s) +struct btree_node *ro_node(struct ro_spine *s) { struct dm_block *block; diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index d12b2cc51f1a..4caf66918cdb 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -38,7 +38,7 @@ static void array_insert(void *base, size_t elt_size, unsigned nr_elts, /*----------------------------------------------------------------*/ /* makes the assumption that no two keys are the same. */ -static int bsearch(struct node *n, uint64_t key, int want_hi) +static int bsearch(struct btree_node *n, uint64_t key, int want_hi) { int lo = -1, hi = le32_to_cpu(n->header.nr_entries); @@ -58,12 +58,12 @@ static int bsearch(struct node *n, uint64_t key, int want_hi) return want_hi ? hi : lo; } -int lower_bound(struct node *n, uint64_t key) +int lower_bound(struct btree_node *n, uint64_t key) { return bsearch(n, key, 0); } -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt) { unsigned i; @@ -77,7 +77,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n, vt->inc(vt->context, value_ptr(n, i)); } -static int insert_at(size_t value_size, struct node *node, unsigned index, +static int insert_at(size_t value_size, struct btree_node *node, unsigned index, uint64_t key, void *value) __dm_written_to_disk(value) { @@ -122,7 +122,7 @@ int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root) { int r; struct dm_block *b; - struct node *n; + struct btree_node *n; size_t block_size; uint32_t max_entries; @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(dm_btree_empty); #define MAX_SPINE_DEPTH 64 struct frame { struct dm_block *b; - struct node *n; + struct btree_node *n; unsigned level; unsigned nr_children; unsigned current_child; @@ -230,6 +230,11 @@ static void pop_frame(struct del_stack *s) dm_tm_unlock(s->tm, f->b); } +static bool is_internal_level(struct dm_btree_info *info, struct frame *f) +{ + return f->level < (info->levels - 1); +} + int dm_btree_del(struct dm_btree_info *info, dm_block_t root) { int r; @@ -241,7 +246,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) s->tm = info->tm; s->top = -1; - r = push_frame(s, root, 1); + r = push_frame(s, root, 0); if (r) goto out; @@ -267,7 +272,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) if (r) goto out; - } else if (f->level != (info->levels - 1)) { + } else if (is_internal_level(info, f)) { b = value64(f->n, f->current_child); f->current_child++; r = push_frame(s, b, f->level + 1); @@ -295,7 +300,7 @@ EXPORT_SYMBOL_GPL(dm_btree_del); /*----------------------------------------------------------------*/ static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, - int (*search_fn)(struct node *, uint64_t), + int (*search_fn)(struct btree_node *, uint64_t), uint64_t *result_key, void *v, size_t value_size) { int i, r; @@ -406,7 +411,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *parent; - struct node *ln, *rn, *pn; + struct btree_node *ln, *rn, *pn; __le64 location; left = shadow_current(s); @@ -491,7 +496,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *new_parent; - struct node *pn, *ln, *rn; + struct btree_node *pn, *ln, *rn; __le64 val; new_parent = shadow_current(s); @@ -576,7 +581,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, uint64_t key, unsigned *index) { int r, i = *index, top = 1; - struct node *node; + struct btree_node *node; for (;;) { r = shadow_step(s, root, vt); @@ -643,7 +648,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root, unsigned level, index = -1, last_level = info->levels - 1; dm_block_t block = root; struct shadow_spine spine; - struct node *n; + struct btree_node *n; struct dm_btree_value_type le64_type; le64_type.context = NULL; diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index f3a9af8cdec3..3e7a88d99eb0 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -39,8 +39,8 @@ static int index_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) { - DMERR("index_check failed blocknr %llu wanted %llu", - le64_to_cpu(mi_le->blocknr), dm_block_location(b)); + DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(mi_le->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -48,8 +48,8 @@ static int index_check(struct dm_block_validator *v, block_size - sizeof(__le32), INDEX_CSUM_XOR)); if (csum_disk != mi_le->csum) { - DMERR("index_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); + DMERR_LIMIT("index_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); return -EILSEQ; } @@ -89,8 +89,8 @@ static int bitmap_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) { - DMERR("bitmap check failed blocknr %llu wanted %llu", - le64_to_cpu(disk_header->blocknr), dm_block_location(b)); + DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu", + le64_to_cpu(disk_header->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -98,8 +98,8 @@ static int bitmap_check(struct dm_block_validator *v, block_size - sizeof(__le32), BITMAP_CSUM_XOR)); if (csum_disk != disk_header->csum) { - DMERR("bitmap check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); + DMERR_LIMIT("bitmap check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); return -EILSEQ; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index e89ae5e7a519..906cf3df71af 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -337,7 +337,7 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) { int r = sm_metadata_new_block_(sm, b); if (r) - DMERR("out of metadata space"); + DMERR("unable to allocate new metadata block"); return r; } diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 770a0d01e0b9..05164d7f054b 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -25,7 +25,6 @@ #include <linux/dma-mapping.h> #include <linux/spinlock.h> #include <linux/gpio.h> -#include <plat/cpu.h> #include <linux/platform_device.h> #include <linux/platform_data/usb-omap.h> #include <linux/pm_runtime.h> @@ -384,7 +383,7 @@ static void omap_usbhs_init(struct device *dev) reg &= ~OMAP_UHH_HOSTCONFIG_P3_CONNECT_STATUS; /* Bypass the TLL module for PHY mode operation */ - if (cpu_is_omap3430() && (omap_rev() <= OMAP3430_REV_ES2_1)) { + if (pdata->single_ulpi_bypass) { dev_dbg(dev, "OMAP3 ES version <= ES2.1\n"); if (is_ehci_phy_mode(pdata->port_mode[0]) || is_ehci_phy_mode(pdata->port_mode[1]) || diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 8d082b46426b..d971817182f7 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -53,6 +53,10 @@ #include <linux/kthread.h> #include "xpc.h" +#ifdef CONFIG_X86_64 +#include <asm/traps.h> +#endif + /* define two XPC debug device structures to be used with dev_dbg() et al */ struct device_driver xpc_dbg_name = { @@ -1079,6 +1083,9 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) return NOTIFY_DONE; } +/* Used to only allow one cpu to complete disconnect */ +static unsigned int xpc_die_disconnecting; + /* * Notify other partitions to deactivate from us by first disengaging from all * references to our memory. @@ -1092,6 +1099,9 @@ xpc_die_deactivate(void) long keep_waiting; long wait_to_print; + if (cmpxchg(&xpc_die_disconnecting, 0, 1)) + return; + /* keep xpc_hb_checker thread from doing anything (just in case) */ xpc_exiting = 1; @@ -1159,7 +1169,7 @@ xpc_die_deactivate(void) * about the lack of a heartbeat. */ static int -xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) +xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args) { #ifdef CONFIG_IA64 /* !!! temporary kludge */ switch (event) { @@ -1191,7 +1201,27 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) break; } #else - xpc_die_deactivate(); + struct die_args *die_args = _die_args; + + switch (event) { + case DIE_TRAP: + if (die_args->trapnr == X86_TRAP_DF) + xpc_die_deactivate(); + + if (((die_args->trapnr == X86_TRAP_MF) || + (die_args->trapnr == X86_TRAP_XF)) && + !user_mode_vm(die_args->regs)) + xpc_die_deactivate(); + + break; + case DIE_INT3: + case DIE_DEBUG: + break; + case DIE_OOPS: + case DIE_GPF: + default: + xpc_die_deactivate(); + } #endif return NOTIFY_DONE; diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c index fec406b4553d..c071d410488f 100644 --- a/drivers/mtd/ubi/attach.c +++ b/drivers/mtd/ubi/attach.c @@ -322,7 +322,6 @@ static struct ubi_ainf_volume *add_volume(struct ubi_attach_info *ai, int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, int pnum, const struct ubi_vid_hdr *vid_hdr) { - void *buf; int len, err, second_is_newer, bitflips = 0, corrupted = 0; uint32_t data_crc, crc; struct ubi_vid_hdr *vh = NULL; @@ -393,18 +392,14 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, /* Read the data of the copy and check the CRC */ len = be32_to_cpu(vid_hdr->data_size); - buf = vmalloc(len); - if (!buf) { - err = -ENOMEM; - goto out_free_vidh; - } - err = ubi_io_read_data(ubi, buf, pnum, 0, len); + mutex_lock(&ubi->buf_mutex); + err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, len); if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) - goto out_free_buf; + goto out_unlock; data_crc = be32_to_cpu(vid_hdr->data_crc); - crc = crc32(UBI_CRC32_INIT, buf, len); + crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, len); if (crc != data_crc) { dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x", pnum, crc, data_crc); @@ -415,8 +410,8 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, dbg_bld("PEB %d CRC is OK", pnum); bitflips = !!err; } + mutex_unlock(&ubi->buf_mutex); - vfree(buf); ubi_free_vid_hdr(ubi, vh); if (second_is_newer) @@ -426,8 +421,8 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, return second_is_newer | (bitflips << 1) | (corrupted << 2); -out_free_buf: - vfree(buf); +out_unlock: + mutex_unlock(&ubi->buf_mutex); out_free_vidh: ubi_free_vid_hdr(ubi, vh); return err; @@ -1453,7 +1448,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) goto out_wl; #ifdef CONFIG_MTD_UBI_FASTMAP - if (ubi->fm && ubi->dbg->chk_gen) { + if (ubi->fm && ubi_dbg_chk_gen(ubi)) { struct ubi_attach_info *scan_ai; scan_ai = alloc_ai("ubi_ckh_aeb_slab_cache"); @@ -1503,7 +1498,7 @@ static int self_check_ai(struct ubi_device *ubi, struct ubi_attach_info *ai) struct ubi_ainf_peb *aeb, *last_aeb; uint8_t *buf; - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return 0; /* diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 344b4cb49d4e..a56133585e92 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -825,8 +825,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id) * No available PEBs to re-size the volume, clear the flag on * flash and exit. */ - memcpy(&vtbl_rec, &ubi->vtbl[vol_id], - sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol_id]; err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); if (err) ubi_err("cannot clean auto-resize flag for volume %d", @@ -986,14 +985,10 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, if (!ubi->fm_buf) goto out_free; #endif - err = ubi_debugging_init_dev(ubi); - if (err) - goto out_free; - err = ubi_attach(ubi, 0); if (err) { ubi_err("failed to attach mtd%d, error %d", mtd->index, err); - goto out_debugging; + goto out_free; } if (ubi->autoresize_vol_id != -1) { @@ -1060,8 +1055,6 @@ out_detach: ubi_wl_close(ubi); ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); -out_debugging: - ubi_debugging_exit_dev(ubi); out_free: vfree(ubi->peb_buf); vfree(ubi->fm_buf); @@ -1139,7 +1132,6 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); put_mtd_device(ubi->mtd); - ubi_debugging_exit_dev(ubi); vfree(ubi->peb_buf); vfree(ubi->fm_buf); ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index 26908a59506b..63cb1d7236ce 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -217,32 +217,6 @@ void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req) pr_err("\t1st 16 characters of name: %s\n", nm); } -/** - * ubi_debugging_init_dev - initialize debugging for an UBI device. - * @ubi: UBI device description object - * - * This function initializes debugging-related data for UBI device @ubi. - * Returns zero in case of success and a negative error code in case of - * failure. - */ -int ubi_debugging_init_dev(struct ubi_device *ubi) -{ - ubi->dbg = kzalloc(sizeof(struct ubi_debug_info), GFP_KERNEL); - if (!ubi->dbg) - return -ENOMEM; - - return 0; -} - -/** - * ubi_debugging_exit_dev - free debugging data for an UBI device. - * @ubi: UBI device description object - */ -void ubi_debugging_exit_dev(struct ubi_device *ubi) -{ - kfree(ubi->dbg); -} - /* * Root directory for UBI stuff in debugfs. Contains sub-directories which * contain the stuff specific to particular UBI devices. @@ -295,7 +269,7 @@ static ssize_t dfs_file_read(struct file *file, char __user *user_buf, ubi = ubi_get_device(ubi_num); if (!ubi) return -ENODEV; - d = ubi->dbg; + d = &ubi->dbg; if (dent == d->dfs_chk_gen) val = d->chk_gen; @@ -341,7 +315,7 @@ static ssize_t dfs_file_write(struct file *file, const char __user *user_buf, ubi = ubi_get_device(ubi_num); if (!ubi) return -ENODEV; - d = ubi->dbg; + d = &ubi->dbg; buf_size = min_t(size_t, count, (sizeof(buf) - 1)); if (copy_from_user(buf, user_buf, buf_size)) { @@ -398,7 +372,7 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi) unsigned long ubi_num = ubi->ubi_num; const char *fname; struct dentry *dent; - struct ubi_debug_info *d = ubi->dbg; + struct ubi_debug_info *d = &ubi->dbg; if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; @@ -471,5 +445,5 @@ out: void ubi_debugfs_exit_dev(struct ubi_device *ubi) { if (IS_ENABLED(CONFIG_DEBUG_FS)) - debugfs_remove_recursive(ubi->dbg->dfs_dir); + debugfs_remove_recursive(ubi->dbg.dfs_dir); } diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 3dbc877d9663..33f8f3b2c9b2 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -60,51 +60,11 @@ void ubi_dump_aeb(const struct ubi_ainf_peb *aeb, int type); void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req); int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len); -int ubi_debugging_init_dev(struct ubi_device *ubi); -void ubi_debugging_exit_dev(struct ubi_device *ubi); int ubi_debugfs_init(void); void ubi_debugfs_exit(void); int ubi_debugfs_init_dev(struct ubi_device *ubi); void ubi_debugfs_exit_dev(struct ubi_device *ubi); -/* - * The UBI debugfs directory name pattern and maximum name length (3 for "ubi" - * + 2 for the number plus 1 for the trailing zero byte. - */ -#define UBI_DFS_DIR_NAME "ubi%d" -#define UBI_DFS_DIR_LEN (3 + 2 + 1) - -/** - * struct ubi_debug_info - debugging information for an UBI device. - * - * @chk_gen: if UBI general extra checks are enabled - * @chk_io: if UBI I/O extra checks are enabled - * @disable_bgt: disable the background task for testing purposes - * @emulate_bitflips: emulate bit-flips for testing purposes - * @emulate_io_failures: emulate write/erase failures for testing purposes - * @dfs_dir_name: name of debugfs directory containing files of this UBI device - * @dfs_dir: direntry object of the UBI device debugfs directory - * @dfs_chk_gen: debugfs knob to enable UBI general extra checks - * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks - * @dfs_disable_bgt: debugfs knob to disable the background task - * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips - * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures - */ -struct ubi_debug_info { - unsigned int chk_gen:1; - unsigned int chk_io:1; - unsigned int disable_bgt:1; - unsigned int emulate_bitflips:1; - unsigned int emulate_io_failures:1; - char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; - struct dentry *dfs_dir; - struct dentry *dfs_chk_gen; - struct dentry *dfs_chk_io; - struct dentry *dfs_disable_bgt; - struct dentry *dfs_emulate_bitflips; - struct dentry *dfs_emulate_io_failures; -}; - /** * ubi_dbg_is_bgt_disabled - if the background thread is disabled. * @ubi: UBI device description object @@ -114,7 +74,7 @@ struct ubi_debug_info { */ static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) { - return ubi->dbg->disable_bgt; + return ubi->dbg.disable_bgt; } /** @@ -125,7 +85,7 @@ static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) */ static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { - if (ubi->dbg->emulate_bitflips) + if (ubi->dbg.emulate_bitflips) return !(random32() % 200); return 0; } @@ -139,7 +99,7 @@ static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) */ static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) { - if (ubi->dbg->emulate_io_failures) + if (ubi->dbg.emulate_io_failures) return !(random32() % 500); return 0; } @@ -153,9 +113,18 @@ static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) */ static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) { - if (ubi->dbg->emulate_io_failures) + if (ubi->dbg.emulate_io_failures) return !(random32() % 400); return 0; } +static inline int ubi_dbg_chk_io(const struct ubi_device *ubi) +{ + return ubi->dbg.chk_io; +} + +static inline int ubi_dbg_chk_gen(const struct ubi_device *ubi) +{ + return ubi->dbg.chk_gen; +} #endif /* !__UBI_DEBUG_H__ */ diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 1a5f53c090d4..0648c6996d43 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -814,10 +814,8 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (max_sqnum > ai->max_sqnum) ai->max_sqnum = max_sqnum; - list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) { - list_del(&tmp_aeb->u.list); - list_add_tail(&tmp_aeb->u.list, &ai->free); - } + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) + list_move_tail(&tmp_aeb->u.list, &ai->free); /* * If fastmap is leaking PEBs (must not happen), raise a diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c index 4bd4db8c84c9..b93807b4c459 100644 --- a/drivers/mtd/ubi/gluebi.c +++ b/drivers/mtd/ubi/gluebi.c @@ -171,17 +171,17 @@ static void gluebi_put_device(struct mtd_info *mtd) static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, unsigned char *buf) { - int err = 0, lnum, offs, total_read; + int err = 0, lnum, offs, bytes_left; struct gluebi_device *gluebi; gluebi = container_of(mtd, struct gluebi_device, mtd); lnum = div_u64_rem(from, mtd->erasesize, &offs); - total_read = len; - while (total_read) { + bytes_left = len; + while (bytes_left) { size_t to_read = mtd->erasesize - offs; - if (to_read > total_read) - to_read = total_read; + if (to_read > bytes_left) + to_read = bytes_left; err = ubi_read(gluebi->desc, lnum, buf, offs, to_read); if (err) @@ -189,11 +189,11 @@ static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len, lnum += 1; offs = 0; - total_read -= to_read; + bytes_left -= to_read; buf += to_read; } - *retlen = len - total_read; + *retlen = len - bytes_left; return err; } @@ -211,7 +211,7 @@ static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len, static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { - int err = 0, lnum, offs, total_written; + int err = 0, lnum, offs, bytes_left; struct gluebi_device *gluebi; gluebi = container_of(mtd, struct gluebi_device, mtd); @@ -220,12 +220,12 @@ static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len, if (len % mtd->writesize || offs % mtd->writesize) return -EINVAL; - total_written = len; - while (total_written) { + bytes_left = len; + while (bytes_left) { size_t to_write = mtd->erasesize - offs; - if (to_write > total_written) - to_write = total_written; + if (to_write > bytes_left) + to_write = bytes_left; err = ubi_leb_write(gluebi->desc, lnum, buf, offs, to_write); if (err) @@ -233,11 +233,11 @@ static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len, lnum += 1; offs = 0; - total_written -= to_write; + bytes_left -= to_write; buf += to_write; } - *retlen = len - total_written; + *retlen = len - bytes_left; return err; } diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 78a1dcbf2107..bf79def40126 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -1132,7 +1132,7 @@ static int self_check_not_bad(const struct ubi_device *ubi, int pnum) { int err; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; err = ubi_io_is_bad(ubi, pnum); @@ -1159,7 +1159,7 @@ static int self_check_ec_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; magic = be32_to_cpu(ec_hdr->magic); @@ -1197,7 +1197,7 @@ static int self_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) uint32_t crc, hdr_crc; struct ubi_ec_hdr *ec_hdr; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -1241,7 +1241,7 @@ static int self_check_vid_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; magic = be32_to_cpu(vid_hdr->magic); @@ -1282,7 +1282,7 @@ static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) struct ubi_vid_hdr *vid_hdr; void *p; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); @@ -1334,7 +1334,7 @@ static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, void *buf1; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); @@ -1398,7 +1398,7 @@ int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) void *buf; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 7d57469723cf..8ea6297a208f 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -85,6 +85,13 @@ #define UBI_UNKNOWN -1 /* + * The UBI debugfs directory name pattern and maximum name length (3 for "ubi" + * + 2 for the number plus 1 for the trailing zero byte. + */ +#define UBI_DFS_DIR_NAME "ubi%d" +#define UBI_DFS_DIR_LEN (3 + 2 + 1) + +/* * Error codes returned by the I/O sub-system. * * UBI_IO_FF: the read region of flash contains only 0xFFs @@ -342,6 +349,37 @@ struct ubi_volume_desc { struct ubi_wl_entry; /** + * struct ubi_debug_info - debugging information for an UBI device. + * + * @chk_gen: if UBI general extra checks are enabled + * @chk_io: if UBI I/O extra checks are enabled + * @disable_bgt: disable the background task for testing purposes + * @emulate_bitflips: emulate bit-flips for testing purposes + * @emulate_io_failures: emulate write/erase failures for testing purposes + * @dfs_dir_name: name of debugfs directory containing files of this UBI device + * @dfs_dir: direntry object of the UBI device debugfs directory + * @dfs_chk_gen: debugfs knob to enable UBI general extra checks + * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks + * @dfs_disable_bgt: debugfs knob to disable the background task + * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips + * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures + */ +struct ubi_debug_info { + unsigned int chk_gen:1; + unsigned int chk_io:1; + unsigned int disable_bgt:1; + unsigned int emulate_bitflips:1; + unsigned int emulate_io_failures:1; + char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; + struct dentry *dfs_dir; + struct dentry *dfs_chk_gen; + struct dentry *dfs_chk_io; + struct dentry *dfs_disable_bgt; + struct dentry *dfs_emulate_bitflips; + struct dentry *dfs_emulate_io_failures; +}; + +/** * struct ubi_device - UBI device description structure * @dev: UBI device object to use the the Linux device model * @cdev: character device object to create character device @@ -545,7 +583,7 @@ struct ubi_device { struct mutex buf_mutex; struct mutex ckvol_mutex; - struct ubi_debug_info *dbg; + struct ubi_debug_info dbg; }; /** diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index 9f2ebd8750e7..ec2c2dc1c1ca 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -64,8 +64,7 @@ static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol) return 0; } - memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], - sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol->vol_id]; vtbl_rec.upd_marker = 1; mutex_lock(&ubi->device_mutex); @@ -93,8 +92,7 @@ static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol, dbg_gen("clear update marker for volume %d", vol->vol_id); - memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], - sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol->vol_id]; ubi_assert(vol->upd_marker && vtbl_rec.upd_marker); vtbl_rec.upd_marker = 0; diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 9169e58c262e..8330703c098f 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -535,7 +535,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) } /* Change volume table record */ - memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol_id]; vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); if (err) @@ -847,7 +847,7 @@ static int self_check_volumes(struct ubi_device *ubi) { int i, err = 0; - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return 0; for (i = 0; i < ubi->vtbl_slots; i++) { diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 926e3df14fb2..d77b1c1d7c72 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -858,7 +858,7 @@ out_free: */ static void self_vtbl_check(const struct ubi_device *ubi) { - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return; if (vtbl_check(ubi, ubi->vtbl)) { diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 2144f611196e..5df49d3cb5c7 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1,5 +1,4 @@ /* - * @ubi: UBI device description object * Copyright (c) International Business Machines Corp., 2006 * * This program is free software; you can redistribute it and/or modify @@ -2050,7 +2049,7 @@ static int self_check_ec(struct ubi_device *ubi, int pnum, int ec) long long read_ec; struct ubi_ec_hdr *ec_hdr; - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -2090,7 +2089,7 @@ out_free: static int self_check_in_wl_tree(const struct ubi_device *ubi, struct ubi_wl_entry *e, struct rb_root *root) { - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return 0; if (in_wl_tree(e, root)) @@ -2116,7 +2115,7 @@ static int self_check_in_pq(const struct ubi_device *ubi, struct ubi_wl_entry *p; int i; - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return 0; for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ef2cb2418535..b7d45f367d4a 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4431,8 +4431,6 @@ static void bond_uninit(struct net_device *bond_dev) list_del(&bond->bond_list); - bond_work_cancel_all(bond); - bond_debug_unregister(bond); __hw_addr_flush(&bond->mc_list); diff --git a/drivers/net/can/sja1000/sja1000_of_platform.c b/drivers/net/can/sja1000/sja1000_of_platform.c index 0f5917000aa2..6433b81256cd 100644 --- a/drivers/net/can/sja1000/sja1000_of_platform.c +++ b/drivers/net/can/sja1000/sja1000_of_platform.c @@ -121,7 +121,7 @@ static int sja1000_ofp_probe(struct platform_device *ofdev) } irq = irq_of_parse_and_map(np, 0); - if (irq == NO_IRQ) { + if (irq == 0) { dev_err(&ofdev->dev, "no irq found\n"); err = -ENODEV; goto exit_unmap_mem; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 378988b5709a..6db997c78a5f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -35,6 +35,8 @@ #ifndef __CXGB4_H__ #define __CXGB4_H__ +#include "t4_hw.h" + #include <linux/bitops.h> #include <linux/cache.h> #include <linux/interrupt.h> @@ -212,6 +214,8 @@ struct tp_err_stats { struct tp_params { unsigned int ntxchan; /* # of Tx channels */ unsigned int tre; /* log2 of core clocks per TP tick */ + unsigned short tx_modq_map; /* TX modulation scheduler queue to */ + /* channel map */ uint32_t dack_re; /* DACK timer resolution */ unsigned short tx_modq[NCHAN]; /* channel to modulation queue map */ @@ -526,6 +530,7 @@ struct adapter { struct net_device *port[MAX_NPORTS]; u8 chan_map[NCHAN]; /* channel -> port map */ + u32 filter_mode; unsigned int l2t_start; unsigned int l2t_end; struct l2t_data *l2t; @@ -545,6 +550,129 @@ struct adapter { spinlock_t stats_lock; }; +/* Defined bit width of user definable filter tuples + */ +#define ETHTYPE_BITWIDTH 16 +#define FRAG_BITWIDTH 1 +#define MACIDX_BITWIDTH 9 +#define FCOE_BITWIDTH 1 +#define IPORT_BITWIDTH 3 +#define MATCHTYPE_BITWIDTH 3 +#define PROTO_BITWIDTH 8 +#define TOS_BITWIDTH 8 +#define PF_BITWIDTH 8 +#define VF_BITWIDTH 8 +#define IVLAN_BITWIDTH 16 +#define OVLAN_BITWIDTH 16 + +/* Filter matching rules. These consist of a set of ingress packet field + * (value, mask) tuples. The associated ingress packet field matches the + * tuple when ((field & mask) == value). (Thus a wildcard "don't care" field + * rule can be constructed by specifying a tuple of (0, 0).) A filter rule + * matches an ingress packet when all of the individual individual field + * matching rules are true. + * + * Partial field masks are always valid, however, while it may be easy to + * understand their meanings for some fields (e.g. IP address to match a + * subnet), for others making sensible partial masks is less intuitive (e.g. + * MPS match type) ... + * + * Most of the following data structures are modeled on T4 capabilities. + * Drivers for earlier chips use the subsets which make sense for those chips. + * We really need to come up with a hardware-independent mechanism to + * represent hardware filter capabilities ... + */ +struct ch_filter_tuple { + /* Compressed header matching field rules. The TP_VLAN_PRI_MAP + * register selects which of these fields will participate in the + * filter match rules -- up to a maximum of 36 bits. Because + * TP_VLAN_PRI_MAP is a global register, all filters must use the same + * set of fields. + */ + uint32_t ethtype:ETHTYPE_BITWIDTH; /* Ethernet type */ + uint32_t frag:FRAG_BITWIDTH; /* IP fragmentation header */ + uint32_t ivlan_vld:1; /* inner VLAN valid */ + uint32_t ovlan_vld:1; /* outer VLAN valid */ + uint32_t pfvf_vld:1; /* PF/VF valid */ + uint32_t macidx:MACIDX_BITWIDTH; /* exact match MAC index */ + uint32_t fcoe:FCOE_BITWIDTH; /* FCoE packet */ + uint32_t iport:IPORT_BITWIDTH; /* ingress port */ + uint32_t matchtype:MATCHTYPE_BITWIDTH; /* MPS match type */ + uint32_t proto:PROTO_BITWIDTH; /* protocol type */ + uint32_t tos:TOS_BITWIDTH; /* TOS/Traffic Type */ + uint32_t pf:PF_BITWIDTH; /* PCI-E PF ID */ + uint32_t vf:VF_BITWIDTH; /* PCI-E VF ID */ + uint32_t ivlan:IVLAN_BITWIDTH; /* inner VLAN */ + uint32_t ovlan:OVLAN_BITWIDTH; /* outer VLAN */ + + /* Uncompressed header matching field rules. These are always + * available for field rules. + */ + uint8_t lip[16]; /* local IP address (IPv4 in [3:0]) */ + uint8_t fip[16]; /* foreign IP address (IPv4 in [3:0]) */ + uint16_t lport; /* local port */ + uint16_t fport; /* foreign port */ +}; + +/* A filter ioctl command. + */ +struct ch_filter_specification { + /* Administrative fields for filter. + */ + uint32_t hitcnts:1; /* count filter hits in TCB */ + uint32_t prio:1; /* filter has priority over active/server */ + + /* Fundamental filter typing. This is the one element of filter + * matching that doesn't exist as a (value, mask) tuple. + */ + uint32_t type:1; /* 0 => IPv4, 1 => IPv6 */ + + /* Packet dispatch information. Ingress packets which match the + * filter rules will be dropped, passed to the host or switched back + * out as egress packets. + */ + uint32_t action:2; /* drop, pass, switch */ + + uint32_t rpttid:1; /* report TID in RSS hash field */ + + uint32_t dirsteer:1; /* 0 => RSS, 1 => steer to iq */ + uint32_t iq:10; /* ingress queue */ + + uint32_t maskhash:1; /* dirsteer=0: store RSS hash in TCB */ + uint32_t dirsteerhash:1;/* dirsteer=1: 0 => TCB contains RSS hash */ + /* 1 => TCB contains IQ ID */ + + /* Switch proxy/rewrite fields. An ingress packet which matches a + * filter with "switch" set will be looped back out as an egress + * packet -- potentially with some Ethernet header rewriting. + */ + uint32_t eport:2; /* egress port to switch packet out */ + uint32_t newdmac:1; /* rewrite destination MAC address */ + uint32_t newsmac:1; /* rewrite source MAC address */ + uint32_t newvlan:2; /* rewrite VLAN Tag */ + uint8_t dmac[ETH_ALEN]; /* new destination MAC address */ + uint8_t smac[ETH_ALEN]; /* new source MAC address */ + uint16_t vlan; /* VLAN Tag to insert */ + + /* Filter rule value/mask pairs. + */ + struct ch_filter_tuple val; + struct ch_filter_tuple mask; +}; + +enum { + FILTER_PASS = 0, /* default */ + FILTER_DROP, + FILTER_SWITCH +}; + +enum { + VLAN_NOCHANGE = 0, /* default */ + VLAN_REMOVE, + VLAN_INSERT, + VLAN_REWRITE +}; + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@ -701,6 +829,12 @@ static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd, void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, const u32 *vals, unsigned int nregs, unsigned int start_idx); +void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, + unsigned int data_reg, u32 *vals, unsigned int nregs, + unsigned int start_idx); + +struct fw_filter_wr; + void t4_intr_enable(struct adapter *adapter); void t4_intr_disable(struct adapter *adapter); int t4_slow_intr_handler(struct adapter *adapter); @@ -737,6 +871,8 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, void t4_load_mtus(struct adapter *adap, const unsigned short *mtus, const unsigned short *alpha, const unsigned short *beta); +void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid); + void t4_wol_magic_enable(struct adapter *adap, unsigned int port, const u8 *addr); int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index a27b4ae20f43..f0718e1a8369 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -175,6 +175,30 @@ enum { MIN_FL_ENTRIES = 16 }; +/* Host shadow copy of ingress filter entry. This is in host native format + * and doesn't match the ordering or bit order, etc. of the hardware of the + * firmware command. The use of bit-field structure elements is purely to + * remind ourselves of the field size limitations and save memory in the case + * where the filter table is large. + */ +struct filter_entry { + /* Administrative fields for filter. + */ + u32 valid:1; /* filter allocated and valid */ + u32 locked:1; /* filter is administratively locked */ + + u32 pending:1; /* filter action is pending firmware reply */ + u32 smtidx:8; /* Source MAC Table index for smac */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + + /* The filter itself. Most of this is a straight copy of information + * provided by the extended ioctl(). Some fields are translated to + * internal forms -- for instance the Ingress Queue ID passed in from + * the ioctl() is translated into the Absolute Ingress Queue ID. + */ + struct ch_filter_specification fs; +}; + #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -325,6 +349,9 @@ enum { static unsigned int tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT; +module_param(tp_vlan_pri_map, uint, 0644); +MODULE_PARM_DESC(tp_vlan_pri_map, "global compressed filter configuration"); + static struct dentry *cxgb4_debugfs_root; static LIST_HEAD(adapter_list); @@ -506,8 +533,67 @@ static int link_start(struct net_device *dev) return ret; } -/* - * Response queue handler for the FW event queue. +/* Clear a filter and release any of its resources that we own. This also + * clears the filter's "pending" status. + */ +static void clear_filter(struct adapter *adap, struct filter_entry *f) +{ + /* If the new or old filter have loopback rewriteing rules then we'll + * need to free any existing Layer Two Table (L2T) entries of the old + * filter rule. The firmware will handle freeing up any Source MAC + * Table (SMT) entries used for rewriting Source MAC Addresses in + * loopback rules. + */ + if (f->l2t) + cxgb4_l2t_release(f->l2t); + + /* The zeroing of the filter rule below clears the filter valid, + * pending, locked flags, l2t pointer, etc. so it's all we need for + * this operation. + */ + memset(f, 0, sizeof(*f)); +} + +/* Handle a filter write/deletion reply. + */ +static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) +{ + unsigned int idx = GET_TID(rpl); + unsigned int nidx = idx - adap->tids.ftid_base; + unsigned int ret; + struct filter_entry *f; + + if (idx >= adap->tids.ftid_base && nidx < + (adap->tids.nftids + adap->tids.nsftids)) { + idx = nidx; + ret = GET_TCB_COOKIE(rpl->cookie); + f = &adap->tids.ftid_tab[idx]; + + if (ret == FW_FILTER_WR_FLT_DELETED) { + /* Clear the filter when we get confirmation from the + * hardware that the filter has been deleted. + */ + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { + dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", + idx); + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_FLT_ADDED) { + f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; + f->pending = 0; /* asynchronous setup completed */ + f->valid = 1; + } else { + /* Something went wrong. Issue a warning about the + * problem and clear everything out. + */ + dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", + idx, ret); + clear_filter(adap, f); + } + } +} + +/* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct pkt_gl *gl) @@ -542,6 +628,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct cpl_l2t_write_rpl *p = (void *)rsp; do_l2t_write_rpl(q->adap, p); + } else if (opcode == CPL_SET_TCB_RPL) { + const struct cpl_set_tcb_rpl *p = (void *)rsp; + + filter_rpl(q->adap, p); } else dev_err(q->adap->pdev_dev, "unexpected CPL %#x on FW event queue\n", opcode); @@ -983,6 +1073,148 @@ static void t4_free_mem(void *addr) kfree(addr); } +/* Send a Work Request to write the filter at a specified index. We construct + * a Firmware Filter Work Request to have the work done and put the indicated + * filter into "pending" mode which will prevent any further actions against + * it till we get a reply from the firmware on the completion status of the + * request. + */ +static int set_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct sk_buff *skb; + struct fw_filter_wr *fwr; + unsigned int ftid; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter->l2t); + if (f->l2t == NULL) + return -EAGAIN; + if (t4_l2t_set_switching(adapter, f->l2t, f->fs.vlan, + f->fs.eport, f->fs.dmac)) { + cxgb4_l2t_release(f->l2t); + f->l2t = NULL; + return -ENOMEM; + } + } + + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL | __GFP_NOFAIL); + fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); + memset(fwr, 0, sizeof(*fwr)); + + /* It would be nice to put most of the following in t4_hw.c but most + * of the work is translating the cxgbtool ch_filter_specification + * into the Work Request and the definition of that structure is + * currently in cxgbtool.h which isn't appropriate to pull into the + * common code. We may eventually try to come up with a more neutral + * filter specification structure but for now it's easiest to simply + * put this fairly direct code in line ... + */ + fwr->op_pkd = htonl(FW_WR_OP(FW_FILTER_WR)); + fwr->len16_pkd = htonl(FW_WR_LEN16(sizeof(*fwr)/16)); + fwr->tid_to_iq = + htonl(V_FW_FILTER_WR_TID(ftid) | + V_FW_FILTER_WR_RQTYPE(f->fs.type) | + V_FW_FILTER_WR_NOREPLY(0) | + V_FW_FILTER_WR_IQ(f->fs.iq)); + fwr->del_filter_to_l2tix = + htonl(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | + V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | + V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | + V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | + V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | + V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | + V_FW_FILTER_WR_DMAC(f->fs.newdmac) | + V_FW_FILTER_WR_SMAC(f->fs.newsmac) | + V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) | + V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | + V_FW_FILTER_WR_TXCHAN(f->fs.eport) | + V_FW_FILTER_WR_PRIO(f->fs.prio) | + V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); + fwr->ethtype = htons(f->fs.val.ethtype); + fwr->ethtypem = htons(f->fs.mask.ethtype); + fwr->frag_to_ovlan_vldm = + (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | + V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | + V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.ivlan_vld) | + V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.ovlan_vld) | + V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.ivlan_vld) | + V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.ovlan_vld)); + fwr->smac_sel = 0; + fwr->rx_chan_rx_rpl_iq = + htons(V_FW_FILTER_WR_RX_CHAN(0) | + V_FW_FILTER_WR_RX_RPL_IQ(adapter->sge.fw_evtq.abs_id)); + fwr->maci_to_matchtypem = + htonl(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | + V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | + V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | + V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | + V_FW_FILTER_WR_PORT(f->fs.val.iport) | + V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | + V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | + V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); + fwr->ptcl = f->fs.val.proto; + fwr->ptclm = f->fs.mask.proto; + fwr->ttyp = f->fs.val.tos; + fwr->ttypm = f->fs.mask.tos; + fwr->ivlan = htons(f->fs.val.ivlan); + fwr->ivlanm = htons(f->fs.mask.ivlan); + fwr->ovlan = htons(f->fs.val.ovlan); + fwr->ovlanm = htons(f->fs.mask.ovlan); + memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); + memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); + memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); + memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); + fwr->lp = htons(f->fs.val.lport); + fwr->lpm = htons(f->fs.mask.lport); + fwr->fp = htons(f->fs.val.fport); + fwr->fpm = htons(f->fs.mask.fport); + if (f->fs.newsmac) + memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; +} + +/* Delete the filter at a specified index. + */ +static int del_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct sk_buff *skb; + struct fw_filter_wr *fwr; + unsigned int len, ftid; + + len = sizeof(*fwr); + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL); + fwr = (struct fw_filter_wr *)__skb_put(skb, len); + t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + t4_mgmt_tx(adapter, skb); + return 0; +} + static inline int is_offload(const struct adapter *adap) { return adap->params.offload; @@ -2195,7 +2427,7 @@ int cxgb4_alloc_atid(struct tid_info *t, void *data) if (t->afree) { union aopen_entry *p = t->afree; - atid = p - t->atid_tab; + atid = (p - t->atid_tab) + t->atid_base; t->afree = p->next; p->data = data; t->atids_in_use++; @@ -2210,7 +2442,7 @@ EXPORT_SYMBOL(cxgb4_alloc_atid); */ void cxgb4_free_atid(struct tid_info *t, unsigned int atid) { - union aopen_entry *p = &t->atid_tab[atid]; + union aopen_entry *p = &t->atid_tab[atid - t->atid_base]; spin_lock_bh(&t->atid_lock); p->next = t->afree; @@ -2249,8 +2481,34 @@ int cxgb4_alloc_stid(struct tid_info *t, int family, void *data) } EXPORT_SYMBOL(cxgb4_alloc_stid); -/* - * Release a server TID. +/* Allocate a server filter TID and set it to the supplied value. + */ +int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data) +{ + int stid; + + spin_lock_bh(&t->stid_lock); + if (family == PF_INET) { + stid = find_next_zero_bit(t->stid_bmap, + t->nstids + t->nsftids, t->nstids); + if (stid < (t->nstids + t->nsftids)) + __set_bit(stid, t->stid_bmap); + else + stid = -1; + } else { + stid = -1; + } + if (stid >= 0) { + t->stid_tab[stid].data = data; + stid += t->stid_base; + t->stids_in_use++; + } + spin_unlock_bh(&t->stid_lock); + return stid; +} +EXPORT_SYMBOL(cxgb4_alloc_sftid); + +/* Release a server TID. */ void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family) { @@ -2362,18 +2620,26 @@ EXPORT_SYMBOL(cxgb4_remove_tid); static int tid_init(struct tid_info *t) { size_t size; + unsigned int stid_bmap_size; unsigned int natids = t->natids; - size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + + stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + size = t->ntids * sizeof(*t->tid_tab) + + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + - BITS_TO_LONGS(t->nstids) * sizeof(long); + t->nsftids * sizeof(*t->stid_tab) + + stid_bmap_size * sizeof(long) + + t->nftids * sizeof(*t->ftid_tab) + + t->nsftids * sizeof(*t->ftid_tab); + t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) return -ENOMEM; t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; - t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids]; + t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; + t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); @@ -2388,7 +2654,7 @@ static int tid_init(struct tid_info *t) t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids); + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); return 0; } @@ -2404,7 +2670,8 @@ static int tid_init(struct tid_info *t) * Returns <0 on error and one of the %NET_XMIT_* values on success. */ int cxgb4_create_server(const struct net_device *dev, unsigned int stid, - __be32 sip, __be16 sport, unsigned int queue) + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue) { unsigned int chan; struct sk_buff *skb; @@ -2750,6 +3017,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) { void *handle; struct cxgb4_lld_info lli; + unsigned short i; lli.pdev = adap->pdev; lli.l2t = adap->l2t; @@ -2776,10 +3044,16 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET( t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >> (adap->fn * 4)); + lli.filt_mode = adap->filter_mode; + /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ + for (i = 0; i < NCHAN; i++) + lli.tx_modq[i] = i; lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS); lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL); lli.fw_vers = adap->params.fw_vers; lli.dbfifo_int_thresh = dbfifo_int_thresh; + lli.sge_pktshift = adap->sge.pktshift; + lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; handle = ulds[uld].add(&lli); if (IS_ERR(handle)) { @@ -2999,6 +3273,126 @@ static int cxgb_close(struct net_device *dev) return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false); } +/* Return an error number if the indicated filter isn't writable ... + */ +static int writable_filter(struct filter_entry *f) +{ + if (f->locked) + return -EPERM; + if (f->pending) + return -EBUSY; + + return 0; +} + +/* Delete the filter at the specified index (if valid). The checks for all + * the common problems with doing this like the filter being locked, currently + * pending in another operation, etc. + */ +static int delete_filter(struct adapter *adapter, unsigned int fidx) +{ + struct filter_entry *f; + int ret; + + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + return -EINVAL; + + f = &adapter->tids.ftid_tab[fidx]; + ret = writable_filter(f); + if (ret) + return ret; + if (f->valid) + return del_filter_wr(adapter, fidx); + + return 0; +} + +int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue, unsigned char port, unsigned char mask) +{ + int ret; + struct filter_entry *f; + struct adapter *adap; + int i; + u8 *val; + + adap = netdev2adap(dev); + + /* Adjust stid to correct filter index */ + stid -= adap->tids.nstids; + stid += adap->tids.nftids; + + /* Check to make sure the filter requested is writable ... + */ + f = &adap->tids.ftid_tab[stid]; + ret = writable_filter(f); + if (ret) + return ret; + + /* Clear out any old resources being used by the filter before + * we start constructing the new filter. + */ + if (f->valid) + clear_filter(adap, f); + + /* Clear out filter specifications */ + memset(&f->fs, 0, sizeof(struct ch_filter_specification)); + f->fs.val.lport = cpu_to_be16(sport); + f->fs.mask.lport = ~0; + val = (u8 *)&sip; + if ((val[0] | val[1] | val[2] | val[3]) != 0) { + for (i = 0; i < 4; i++) { + f->fs.val.lip[i] = val[i]; + f->fs.mask.lip[i] = ~0; + } + if (adap->filter_mode & F_PORT) { + f->fs.val.iport = port; + f->fs.mask.iport = mask; + } + } + + f->fs.dirsteer = 1; + f->fs.iq = queue; + /* Mark filter as locked */ + f->locked = 1; + f->fs.rpttid = 1; + + ret = set_filter_wr(adap, stid); + if (ret) { + clear_filter(adap, f); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(cxgb4_create_server_filter); + +int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, + unsigned int queue, bool ipv6) +{ + int ret; + struct filter_entry *f; + struct adapter *adap; + + adap = netdev2adap(dev); + + /* Adjust stid to correct filter index */ + stid -= adap->tids.nstids; + stid += adap->tids.nftids; + + f = &adap->tids.ftid_tab[stid]; + /* Unlock the filter */ + f->locked = 0; + + ret = delete_filter(adap, stid); + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL(cxgb4_remove_server_filter); + static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev, struct rtnl_link_stats64 *ns) { @@ -3245,6 +3639,34 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c) v = t4_read_reg(adap, TP_PIO_DATA); t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR); + /* first 4 Tx modulation queues point to consecutive Tx channels */ + adap->params.tp.tx_modq_map = 0xE4; + t4_write_reg(adap, A_TP_TX_MOD_QUEUE_REQ_MAP, + V_TX_MOD_QUEUE_REQ_MAP(adap->params.tp.tx_modq_map)); + + /* associate each Tx modulation queue with consecutive Tx channels */ + v = 0x84218421; + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_HDR); + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_FIFO); + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_PCMD); + +#define T4_TX_MODQ_10G_WEIGHT_DEFAULT 16 /* in KB units */ + if (is_offload(adap)) { + t4_write_reg(adap, A_TP_TX_MOD_QUEUE_WEIGHT0, + V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT)); + t4_write_reg(adap, A_TP_TX_MOD_CHANNEL_WEIGHT, + V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT)); + } + /* get basic stuff going */ return t4_early_init(adap, adap->fn); } @@ -4035,6 +4457,10 @@ static int adap_init0(struct adapter *adap) for (j = 0; j < NCHAN; j++) adap->params.tp.tx_modq[j] = j; + t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &adap->filter_mode, 1, + TP_VLAN_PRI_MAP); + adap->flags |= FW_OK; return 0; @@ -4661,6 +5087,17 @@ static void remove_one(struct pci_dev *pdev) if (adapter->debugfs_root) debugfs_remove_recursive(adapter->debugfs_root); + /* If we allocated filters, free up state associated with any + * valid filters ... + */ + if (adapter->tids.ftid_tab) { + struct filter_entry *f = &adapter->tids.ftid_tab[0]; + for (i = 0; i < (adapter->tids.nftids + + adapter->tids.nsftids); i++, f++) + if (f->valid) + clear_filter(adapter, f); + } + if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 39bec73ff87c..e2bbc7f3e2de 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -38,6 +38,7 @@ #include <linux/cache.h> #include <linux/spinlock.h> #include <linux/skbuff.h> +#include <linux/inetdevice.h> #include <linux/atomic.h> /* CPL message priority levels */ @@ -97,7 +98,9 @@ struct tid_info { union aopen_entry *atid_tab; unsigned int natids; + unsigned int atid_base; + struct filter_entry *ftid_tab; unsigned int nftids; unsigned int ftid_base; unsigned int aftid_base; @@ -129,7 +132,7 @@ static inline void *lookup_atid(const struct tid_info *t, unsigned int atid) static inline void *lookup_stid(const struct tid_info *t, unsigned int stid) { stid -= t->stid_base; - return stid < t->nstids ? t->stid_tab[stid].data : NULL; + return stid < (t->nstids + t->nsftids) ? t->stid_tab[stid].data : NULL; } static inline void cxgb4_insert_tid(struct tid_info *t, void *data, @@ -141,6 +144,7 @@ static inline void cxgb4_insert_tid(struct tid_info *t, void *data, int cxgb4_alloc_atid(struct tid_info *t, void *data); int cxgb4_alloc_stid(struct tid_info *t, int family, void *data); +int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data); void cxgb4_free_atid(struct tid_info *t, unsigned int atid); void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family); void cxgb4_remove_tid(struct tid_info *t, unsigned int qid, unsigned int tid); @@ -148,8 +152,14 @@ void cxgb4_remove_tid(struct tid_info *t, unsigned int qid, unsigned int tid); struct in6_addr; int cxgb4_create_server(const struct net_device *dev, unsigned int stid, - __be32 sip, __be16 sport, unsigned int queue); - + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue); +int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue, + unsigned char port, unsigned char mask); +int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, + unsigned int queue, bool ipv6); static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { skb_set_queue_mapping(skb, (queue << 1) | prio); @@ -221,9 +231,16 @@ struct cxgb4_lld_info { unsigned int iscsi_iolen; /* iSCSI max I/O length */ unsigned short udb_density; /* # of user DB/page */ unsigned short ucq_density; /* # of user CQs/page */ + unsigned short filt_mode; /* filter optional components */ + unsigned short tx_modq[NCHAN]; /* maps each tx channel to a */ + /* scheduler queue */ void __iomem *gts_reg; /* address of GTS register */ void __iomem *db_reg; /* address of kernel doorbell */ int dbfifo_int_thresh; /* doorbell fifo int threshold */ + unsigned int sge_pktshift; /* Padding between CPL and */ + /* packet data */ + bool enable_fw_ofld_conn; /* Enable connection through fw */ + /* WR */ }; struct cxgb4_uld_info { diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 6ac77a62f361..29878098101e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -484,6 +484,38 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) handle_failed_resolution(adap, arpq); } +/* Allocate an L2T entry for use by a switching rule. Such need to be + * explicitly freed and while busy they are not on any hash chain, so normal + * address resolution updates do not see them. + */ +struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d) +{ + struct l2t_entry *e; + + write_lock_bh(&d->lock); + e = alloc_l2e(d); + if (e) { + spin_lock(&e->lock); /* avoid race with t4_l2t_free */ + e->state = L2T_STATE_SWITCHING; + atomic_set(&e->refcnt, 1); + spin_unlock(&e->lock); + } + write_unlock_bh(&d->lock); + return e; +} + +/* Sets/updates the contents of a switching L2T entry that has been allocated + * with an earlier call to @t4_l2t_alloc_switching. + */ +int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, + u8 port, u8 *eth_addr) +{ + e->vlan = vlan; + e->lport = port; + memcpy(e->dmac, eth_addr, ETH_ALEN); + return write_l2e(adap, e, 0); +} + struct l2t_data *t4_init_l2t(void) { int i; diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h index 02b31d0c6410..108c0f1fce1c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h @@ -100,6 +100,9 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, unsigned int priority); void t4_l2t_update(struct adapter *adap, struct neighbour *neigh); +struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d); +int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, + u8 port, u8 *eth_addr); struct l2t_data *t4_init_l2t(void); void do_l2t_write_rpl(struct adapter *p, const struct cpl_l2t_write_rpl *rpl); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 8d9c7547b070..22f3af5166bf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -109,7 +109,7 @@ void t4_set_reg_field(struct adapter *adapter, unsigned int addr, u32 mask, * Reads registers that are accessed indirectly through an address/data * register pair. */ -static void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, +void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, u32 *vals, unsigned int nregs, unsigned int start_idx) { @@ -2268,6 +2268,26 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, return 0; } +/* t4_mk_filtdelwr - create a delete filter WR + * @ftid: the filter ID + * @wr: the filter work request to populate + * @qid: ingress queue to receive the delete notification + * + * Creates a filter work request to delete the supplied filter. If @qid is + * negative the delete notification is suppressed. + */ +void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid) +{ + memset(wr, 0, sizeof(*wr)); + wr->op_pkd = htonl(FW_WR_OP(FW_FILTER_WR)); + wr->len16_pkd = htonl(FW_WR_LEN16(sizeof(*wr) / 16)); + wr->tid_to_iq = htonl(V_FW_FILTER_WR_TID(ftid) | + V_FW_FILTER_WR_NOREPLY(qid < 0)); + wr->del_filter_to_l2tix = htonl(F_FW_FILTER_WR_DEL_FILTER); + if (qid >= 0) + wr->rx_chan_rx_rpl_iq = htons(V_FW_FILTER_WR_RX_RPL_IQ(qid)); +} + #define INIT_CMD(var, cmd, rd_wr) do { \ (var).op_to_write = htonl(FW_CMD_OP(FW_##cmd##_CMD) | \ FW_CMD_REQUEST | FW_CMD_##rd_wr); \ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index b760808fd6d9..261d17703adc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -193,8 +193,24 @@ struct work_request_hdr { __be64 wr_lo; }; +/* wr_hi fields */ +#define S_WR_OP 24 +#define V_WR_OP(x) ((__u64)(x) << S_WR_OP) + #define WR_HDR struct work_request_hdr wr +/* option 0 fields */ +#define S_MSS_IDX 60 +#define M_MSS_IDX 0xF +#define V_MSS_IDX(x) ((__u64)(x) << S_MSS_IDX) +#define G_MSS_IDX(x) (((x) >> S_MSS_IDX) & M_MSS_IDX) + +/* option 2 fields */ +#define S_RSS_QUEUE 0 +#define M_RSS_QUEUE 0x3FF +#define V_RSS_QUEUE(x) ((x) << S_RSS_QUEUE) +#define G_RSS_QUEUE(x) (((x) >> S_RSS_QUEUE) & M_RSS_QUEUE) + struct cpl_pass_open_req { WR_HDR; union opcode_tid ot; @@ -204,12 +220,14 @@ struct cpl_pass_open_req { __be32 peer_ip; __be64 opt0; #define TX_CHAN(x) ((x) << 2) +#define NO_CONG(x) ((x) << 4) #define DELACK(x) ((x) << 5) #define ULP_MODE(x) ((x) << 8) #define RCV_BUFSIZ(x) ((x) << 12) #define DSCP(x) ((x) << 22) #define SMAC_SEL(x) ((u64)(x) << 28) #define L2T_IDX(x) ((u64)(x) << 36) +#define TCAM_BYPASS(x) ((u64)(x) << 48) #define NAGLE(x) ((u64)(x) << 49) #define WND_SCALE(x) ((u64)(x) << 50) #define KEEP_ALIVE(x) ((u64)(x) << 54) @@ -247,8 +265,10 @@ struct cpl_pass_accept_rpl { #define RSS_QUEUE_VALID (1 << 10) #define RX_COALESCE_VALID(x) ((x) << 11) #define RX_COALESCE(x) ((x) << 12) +#define PACE(x) ((x) << 16) #define TX_QUEUE(x) ((x) << 23) #define RX_CHANNEL(x) ((x) << 26) +#define CCTRL_ECN(x) ((x) << 27) #define WND_SCALE_EN(x) ((x) << 28) #define TSTAMPS_EN(x) ((x) << 29) #define SACK_EN(x) ((x) << 30) @@ -292,6 +312,9 @@ struct cpl_pass_establish { union opcode_tid ot; __be32 rsvd; __be32 tos_stid; +#define PASS_OPEN_TID(x) ((x) << 0) +#define PASS_OPEN_TOS(x) ((x) << 24) +#define GET_PASS_OPEN_TID(x) (((x) >> 0) & 0xFFFFFF) #define GET_POPEN_TID(x) ((x) & 0xffffff) #define GET_POPEN_TOS(x) (((x) >> 24) & 0xff) __be16 mac_idx; @@ -332,6 +355,7 @@ struct cpl_set_tcb_field { __be16 word_cookie; #define TCB_WORD(x) ((x) << 0) #define TCB_COOKIE(x) ((x) << 5) +#define GET_TCB_COOKIE(x) (((x) >> 5) & 7) __be64 mask; __be64 val; }; @@ -536,6 +560,37 @@ struct cpl_rx_pkt { __be16 err_vec; }; +/* rx_pkt.l2info fields */ +#define S_RX_ETHHDR_LEN 0 +#define M_RX_ETHHDR_LEN 0x1F +#define V_RX_ETHHDR_LEN(x) ((x) << S_RX_ETHHDR_LEN) +#define G_RX_ETHHDR_LEN(x) (((x) >> S_RX_ETHHDR_LEN) & M_RX_ETHHDR_LEN) + +#define S_RX_MACIDX 8 +#define M_RX_MACIDX 0x1FF +#define V_RX_MACIDX(x) ((x) << S_RX_MACIDX) +#define G_RX_MACIDX(x) (((x) >> S_RX_MACIDX) & M_RX_MACIDX) + +#define S_RXF_SYN 21 +#define V_RXF_SYN(x) ((x) << S_RXF_SYN) +#define F_RXF_SYN V_RXF_SYN(1U) + +#define S_RX_CHAN 28 +#define M_RX_CHAN 0xF +#define V_RX_CHAN(x) ((x) << S_RX_CHAN) +#define G_RX_CHAN(x) (((x) >> S_RX_CHAN) & M_RX_CHAN) + +/* rx_pkt.hdr_len fields */ +#define S_RX_TCPHDR_LEN 0 +#define M_RX_TCPHDR_LEN 0x3F +#define V_RX_TCPHDR_LEN(x) ((x) << S_RX_TCPHDR_LEN) +#define G_RX_TCPHDR_LEN(x) (((x) >> S_RX_TCPHDR_LEN) & M_RX_TCPHDR_LEN) + +#define S_RX_IPHDR_LEN 6 +#define M_RX_IPHDR_LEN 0x3FF +#define V_RX_IPHDR_LEN(x) ((x) << S_RX_IPHDR_LEN) +#define G_RX_IPHDR_LEN(x) (((x) >> S_RX_IPHDR_LEN) & M_RX_IPHDR_LEN) + struct cpl_trace_pkt { u8 opcode; u8 intf; @@ -634,6 +689,17 @@ struct cpl_fw6_msg { /* cpl_fw6_msg.type values */ enum { FW6_TYPE_CMD_RPL = 0, + FW6_TYPE_WR_RPL = 1, + FW6_TYPE_CQE = 2, + FW6_TYPE_OFLD_CONNECTION_WR_RPL = 3, +}; + +struct cpl_fw6_msg_ofld_connection_wr_rpl { + __u64 cookie; + __be32 tid; /* or atid in case of active failure */ + __u8 t_state; + __u8 retval; + __u8 rsvd[2]; }; enum { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 75393f5cff41..83ec5f7844ac 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1064,4 +1064,41 @@ #define ADDRESS(x) ((x) << ADDRESS_SHIFT) #define XGMAC_PORT_INT_CAUSE 0x10dc + +#define A_TP_TX_MOD_QUEUE_REQ_MAP 0x7e28 + +#define A_TP_TX_MOD_CHANNEL_WEIGHT 0x7e34 + +#define S_TX_MOD_QUEUE_REQ_MAP 0 +#define M_TX_MOD_QUEUE_REQ_MAP 0xffffU +#define V_TX_MOD_QUEUE_REQ_MAP(x) ((x) << S_TX_MOD_QUEUE_REQ_MAP) + +#define A_TP_TX_MOD_QUEUE_WEIGHT0 0x7e30 + +#define S_TX_MODQ_WEIGHT3 24 +#define M_TX_MODQ_WEIGHT3 0xffU +#define V_TX_MODQ_WEIGHT3(x) ((x) << S_TX_MODQ_WEIGHT3) + +#define S_TX_MODQ_WEIGHT2 16 +#define M_TX_MODQ_WEIGHT2 0xffU +#define V_TX_MODQ_WEIGHT2(x) ((x) << S_TX_MODQ_WEIGHT2) + +#define S_TX_MODQ_WEIGHT1 8 +#define M_TX_MODQ_WEIGHT1 0xffU +#define V_TX_MODQ_WEIGHT1(x) ((x) << S_TX_MODQ_WEIGHT1) + +#define S_TX_MODQ_WEIGHT0 0 +#define M_TX_MODQ_WEIGHT0 0xffU +#define V_TX_MODQ_WEIGHT0(x) ((x) << S_TX_MODQ_WEIGHT0) + +#define A_TP_TX_SCHED_HDR 0x23 + +#define A_TP_TX_SCHED_FIFO 0x24 + +#define A_TP_TX_SCHED_PCMD 0x25 + +#define S_PORT 1 +#define V_PORT(x) ((x) << S_PORT) +#define F_PORT V_PORT(1U) + #endif /* __T4_REGS_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 0abc864cdd3a..a0dcccd846c9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -35,6 +35,45 @@ #ifndef _T4FW_INTERFACE_H_ #define _T4FW_INTERFACE_H_ +enum fw_retval { + FW_SUCCESS = 0, /* completed sucessfully */ + FW_EPERM = 1, /* operation not permitted */ + FW_ENOENT = 2, /* no such file or directory */ + FW_EIO = 5, /* input/output error; hw bad */ + FW_ENOEXEC = 8, /* exec format error; inv microcode */ + FW_EAGAIN = 11, /* try again */ + FW_ENOMEM = 12, /* out of memory */ + FW_EFAULT = 14, /* bad address; fw bad */ + FW_EBUSY = 16, /* resource busy */ + FW_EEXIST = 17, /* file exists */ + FW_EINVAL = 22, /* invalid argument */ + FW_ENOSPC = 28, /* no space left on device */ + FW_ENOSYS = 38, /* functionality not implemented */ + FW_EPROTO = 71, /* protocol error */ + FW_EADDRINUSE = 98, /* address already in use */ + FW_EADDRNOTAVAIL = 99, /* cannot assigned requested address */ + FW_ENETDOWN = 100, /* network is down */ + FW_ENETUNREACH = 101, /* network is unreachable */ + FW_ENOBUFS = 105, /* no buffer space available */ + FW_ETIMEDOUT = 110, /* timeout */ + FW_EINPROGRESS = 115, /* fw internal */ + FW_SCSI_ABORT_REQUESTED = 128, /* */ + FW_SCSI_ABORT_TIMEDOUT = 129, /* */ + FW_SCSI_ABORTED = 130, /* */ + FW_SCSI_CLOSE_REQUESTED = 131, /* */ + FW_ERR_LINK_DOWN = 132, /* */ + FW_RDEV_NOT_READY = 133, /* */ + FW_ERR_RDEV_LOST = 134, /* */ + FW_ERR_RDEV_LOGO = 135, /* */ + FW_FCOE_NO_XCHG = 136, /* */ + FW_SCSI_RSP_ERR = 137, /* */ + FW_ERR_RDEV_IMPL_LOGO = 138, /* */ + FW_SCSI_UNDER_FLOW_ERR = 139, /* */ + FW_SCSI_OVER_FLOW_ERR = 140, /* */ + FW_SCSI_DDP_ERR = 141, /* DDP error*/ + FW_SCSI_TASK_ERR = 142, /* No SCSI tasks available */ +}; + #define FW_T4VF_SGE_BASE_ADDR 0x0000 #define FW_T4VF_MPS_BASE_ADDR 0x0100 #define FW_T4VF_PL_BASE_ADDR 0x0200 @@ -46,6 +85,7 @@ enum fw_wr_opcodes { FW_ULPTX_WR = 0x04, FW_TP_WR = 0x05, FW_ETH_TX_PKT_WR = 0x08, + FW_OFLD_CONNECTION_WR = 0x2f, FW_FLOWC_WR = 0x0a, FW_OFLD_TX_DATA_WR = 0x0b, FW_CMD_WR = 0x10, @@ -81,6 +121,282 @@ struct fw_wr_hdr { #define FW_WR_LEN16(x) ((x) << 0) #define HW_TPL_FR_MT_PR_IV_P_FC 0X32B +#define HW_TPL_FR_MT_PR_OV_P_FC 0X327 + +/* filter wr reply code in cookie in CPL_SET_TCB_RPL */ +enum fw_filter_wr_cookie { + FW_FILTER_WR_SUCCESS, + FW_FILTER_WR_FLT_ADDED, + FW_FILTER_WR_FLT_DELETED, + FW_FILTER_WR_SMT_TBL_FULL, + FW_FILTER_WR_EINVAL, +}; + +struct fw_filter_wr { + __be32 op_pkd; + __be32 len16_pkd; + __be64 r3; + __be32 tid_to_iq; + __be32 del_filter_to_l2tix; + __be16 ethtype; + __be16 ethtypem; + __u8 frag_to_ovlan_vldm; + __u8 smac_sel; + __be16 rx_chan_rx_rpl_iq; + __be32 maci_to_matchtypem; + __u8 ptcl; + __u8 ptclm; + __u8 ttyp; + __u8 ttypm; + __be16 ivlan; + __be16 ivlanm; + __be16 ovlan; + __be16 ovlanm; + __u8 lip[16]; + __u8 lipm[16]; + __u8 fip[16]; + __u8 fipm[16]; + __be16 lp; + __be16 lpm; + __be16 fp; + __be16 fpm; + __be16 r7; + __u8 sma[6]; +}; + +#define S_FW_FILTER_WR_TID 12 +#define M_FW_FILTER_WR_TID 0xfffff +#define V_FW_FILTER_WR_TID(x) ((x) << S_FW_FILTER_WR_TID) +#define G_FW_FILTER_WR_TID(x) \ + (((x) >> S_FW_FILTER_WR_TID) & M_FW_FILTER_WR_TID) + +#define S_FW_FILTER_WR_RQTYPE 11 +#define M_FW_FILTER_WR_RQTYPE 0x1 +#define V_FW_FILTER_WR_RQTYPE(x) ((x) << S_FW_FILTER_WR_RQTYPE) +#define G_FW_FILTER_WR_RQTYPE(x) \ + (((x) >> S_FW_FILTER_WR_RQTYPE) & M_FW_FILTER_WR_RQTYPE) +#define F_FW_FILTER_WR_RQTYPE V_FW_FILTER_WR_RQTYPE(1U) + +#define S_FW_FILTER_WR_NOREPLY 10 +#define M_FW_FILTER_WR_NOREPLY 0x1 +#define V_FW_FILTER_WR_NOREPLY(x) ((x) << S_FW_FILTER_WR_NOREPLY) +#define G_FW_FILTER_WR_NOREPLY(x) \ + (((x) >> S_FW_FILTER_WR_NOREPLY) & M_FW_FILTER_WR_NOREPLY) +#define F_FW_FILTER_WR_NOREPLY V_FW_FILTER_WR_NOREPLY(1U) + +#define S_FW_FILTER_WR_IQ 0 +#define M_FW_FILTER_WR_IQ 0x3ff +#define V_FW_FILTER_WR_IQ(x) ((x) << S_FW_FILTER_WR_IQ) +#define G_FW_FILTER_WR_IQ(x) \ + (((x) >> S_FW_FILTER_WR_IQ) & M_FW_FILTER_WR_IQ) + +#define S_FW_FILTER_WR_DEL_FILTER 31 +#define M_FW_FILTER_WR_DEL_FILTER 0x1 +#define V_FW_FILTER_WR_DEL_FILTER(x) ((x) << S_FW_FILTER_WR_DEL_FILTER) +#define G_FW_FILTER_WR_DEL_FILTER(x) \ + (((x) >> S_FW_FILTER_WR_DEL_FILTER) & M_FW_FILTER_WR_DEL_FILTER) +#define F_FW_FILTER_WR_DEL_FILTER V_FW_FILTER_WR_DEL_FILTER(1U) + +#define S_FW_FILTER_WR_RPTTID 25 +#define M_FW_FILTER_WR_RPTTID 0x1 +#define V_FW_FILTER_WR_RPTTID(x) ((x) << S_FW_FILTER_WR_RPTTID) +#define G_FW_FILTER_WR_RPTTID(x) \ + (((x) >> S_FW_FILTER_WR_RPTTID) & M_FW_FILTER_WR_RPTTID) +#define F_FW_FILTER_WR_RPTTID V_FW_FILTER_WR_RPTTID(1U) + +#define S_FW_FILTER_WR_DROP 24 +#define M_FW_FILTER_WR_DROP 0x1 +#define V_FW_FILTER_WR_DROP(x) ((x) << S_FW_FILTER_WR_DROP) +#define G_FW_FILTER_WR_DROP(x) \ + (((x) >> S_FW_FILTER_WR_DROP) & M_FW_FILTER_WR_DROP) +#define F_FW_FILTER_WR_DROP V_FW_FILTER_WR_DROP(1U) + +#define S_FW_FILTER_WR_DIRSTEER 23 +#define M_FW_FILTER_WR_DIRSTEER 0x1 +#define V_FW_FILTER_WR_DIRSTEER(x) ((x) << S_FW_FILTER_WR_DIRSTEER) +#define G_FW_FILTER_WR_DIRSTEER(x) \ + (((x) >> S_FW_FILTER_WR_DIRSTEER) & M_FW_FILTER_WR_DIRSTEER) +#define F_FW_FILTER_WR_DIRSTEER V_FW_FILTER_WR_DIRSTEER(1U) + +#define S_FW_FILTER_WR_MASKHASH 22 +#define M_FW_FILTER_WR_MASKHASH 0x1 +#define V_FW_FILTER_WR_MASKHASH(x) ((x) << S_FW_FILTER_WR_MASKHASH) +#define G_FW_FILTER_WR_MASKHASH(x) \ + (((x) >> S_FW_FILTER_WR_MASKHASH) & M_FW_FILTER_WR_MASKHASH) +#define F_FW_FILTER_WR_MASKHASH V_FW_FILTER_WR_MASKHASH(1U) + +#define S_FW_FILTER_WR_DIRSTEERHASH 21 +#define M_FW_FILTER_WR_DIRSTEERHASH 0x1 +#define V_FW_FILTER_WR_DIRSTEERHASH(x) ((x) << S_FW_FILTER_WR_DIRSTEERHASH) +#define G_FW_FILTER_WR_DIRSTEERHASH(x) \ + (((x) >> S_FW_FILTER_WR_DIRSTEERHASH) & M_FW_FILTER_WR_DIRSTEERHASH) +#define F_FW_FILTER_WR_DIRSTEERHASH V_FW_FILTER_WR_DIRSTEERHASH(1U) + +#define S_FW_FILTER_WR_LPBK 20 +#define M_FW_FILTER_WR_LPBK 0x1 +#define V_FW_FILTER_WR_LPBK(x) ((x) << S_FW_FILTER_WR_LPBK) +#define G_FW_FILTER_WR_LPBK(x) \ + (((x) >> S_FW_FILTER_WR_LPBK) & M_FW_FILTER_WR_LPBK) +#define F_FW_FILTER_WR_LPBK V_FW_FILTER_WR_LPBK(1U) + +#define S_FW_FILTER_WR_DMAC 19 +#define M_FW_FILTER_WR_DMAC 0x1 +#define V_FW_FILTER_WR_DMAC(x) ((x) << S_FW_FILTER_WR_DMAC) +#define G_FW_FILTER_WR_DMAC(x) \ + (((x) >> S_FW_FILTER_WR_DMAC) & M_FW_FILTER_WR_DMAC) +#define F_FW_FILTER_WR_DMAC V_FW_FILTER_WR_DMAC(1U) + +#define S_FW_FILTER_WR_SMAC 18 +#define M_FW_FILTER_WR_SMAC 0x1 +#define V_FW_FILTER_WR_SMAC(x) ((x) << S_FW_FILTER_WR_SMAC) +#define G_FW_FILTER_WR_SMAC(x) \ + (((x) >> S_FW_FILTER_WR_SMAC) & M_FW_FILTER_WR_SMAC) +#define F_FW_FILTER_WR_SMAC V_FW_FILTER_WR_SMAC(1U) + +#define S_FW_FILTER_WR_INSVLAN 17 +#define M_FW_FILTER_WR_INSVLAN 0x1 +#define V_FW_FILTER_WR_INSVLAN(x) ((x) << S_FW_FILTER_WR_INSVLAN) +#define G_FW_FILTER_WR_INSVLAN(x) \ + (((x) >> S_FW_FILTER_WR_INSVLAN) & M_FW_FILTER_WR_INSVLAN) +#define F_FW_FILTER_WR_INSVLAN V_FW_FILTER_WR_INSVLAN(1U) + +#define S_FW_FILTER_WR_RMVLAN 16 +#define M_FW_FILTER_WR_RMVLAN 0x1 +#define V_FW_FILTER_WR_RMVLAN(x) ((x) << S_FW_FILTER_WR_RMVLAN) +#define G_FW_FILTER_WR_RMVLAN(x) \ + (((x) >> S_FW_FILTER_WR_RMVLAN) & M_FW_FILTER_WR_RMVLAN) +#define F_FW_FILTER_WR_RMVLAN V_FW_FILTER_WR_RMVLAN(1U) + +#define S_FW_FILTER_WR_HITCNTS 15 +#define M_FW_FILTER_WR_HITCNTS 0x1 +#define V_FW_FILTER_WR_HITCNTS(x) ((x) << S_FW_FILTER_WR_HITCNTS) +#define G_FW_FILTER_WR_HITCNTS(x) \ + (((x) >> S_FW_FILTER_WR_HITCNTS) & M_FW_FILTER_WR_HITCNTS) +#define F_FW_FILTER_WR_HITCNTS V_FW_FILTER_WR_HITCNTS(1U) + +#define S_FW_FILTER_WR_TXCHAN 13 +#define M_FW_FILTER_WR_TXCHAN 0x3 +#define V_FW_FILTER_WR_TXCHAN(x) ((x) << S_FW_FILTER_WR_TXCHAN) +#define G_FW_FILTER_WR_TXCHAN(x) \ + (((x) >> S_FW_FILTER_WR_TXCHAN) & M_FW_FILTER_WR_TXCHAN) + +#define S_FW_FILTER_WR_PRIO 12 +#define M_FW_FILTER_WR_PRIO 0x1 +#define V_FW_FILTER_WR_PRIO(x) ((x) << S_FW_FILTER_WR_PRIO) +#define G_FW_FILTER_WR_PRIO(x) \ + (((x) >> S_FW_FILTER_WR_PRIO) & M_FW_FILTER_WR_PRIO) +#define F_FW_FILTER_WR_PRIO V_FW_FILTER_WR_PRIO(1U) + +#define S_FW_FILTER_WR_L2TIX 0 +#define M_FW_FILTER_WR_L2TIX 0xfff +#define V_FW_FILTER_WR_L2TIX(x) ((x) << S_FW_FILTER_WR_L2TIX) +#define G_FW_FILTER_WR_L2TIX(x) \ + (((x) >> S_FW_FILTER_WR_L2TIX) & M_FW_FILTER_WR_L2TIX) + +#define S_FW_FILTER_WR_FRAG 7 +#define M_FW_FILTER_WR_FRAG 0x1 +#define V_FW_FILTER_WR_FRAG(x) ((x) << S_FW_FILTER_WR_FRAG) +#define G_FW_FILTER_WR_FRAG(x) \ + (((x) >> S_FW_FILTER_WR_FRAG) & M_FW_FILTER_WR_FRAG) +#define F_FW_FILTER_WR_FRAG V_FW_FILTER_WR_FRAG(1U) + +#define S_FW_FILTER_WR_FRAGM 6 +#define M_FW_FILTER_WR_FRAGM 0x1 +#define V_FW_FILTER_WR_FRAGM(x) ((x) << S_FW_FILTER_WR_FRAGM) +#define G_FW_FILTER_WR_FRAGM(x) \ + (((x) >> S_FW_FILTER_WR_FRAGM) & M_FW_FILTER_WR_FRAGM) +#define F_FW_FILTER_WR_FRAGM V_FW_FILTER_WR_FRAGM(1U) + +#define S_FW_FILTER_WR_IVLAN_VLD 5 +#define M_FW_FILTER_WR_IVLAN_VLD 0x1 +#define V_FW_FILTER_WR_IVLAN_VLD(x) ((x) << S_FW_FILTER_WR_IVLAN_VLD) +#define G_FW_FILTER_WR_IVLAN_VLD(x) \ + (((x) >> S_FW_FILTER_WR_IVLAN_VLD) & M_FW_FILTER_WR_IVLAN_VLD) +#define F_FW_FILTER_WR_IVLAN_VLD V_FW_FILTER_WR_IVLAN_VLD(1U) + +#define S_FW_FILTER_WR_OVLAN_VLD 4 +#define M_FW_FILTER_WR_OVLAN_VLD 0x1 +#define V_FW_FILTER_WR_OVLAN_VLD(x) ((x) << S_FW_FILTER_WR_OVLAN_VLD) +#define G_FW_FILTER_WR_OVLAN_VLD(x) \ + (((x) >> S_FW_FILTER_WR_OVLAN_VLD) & M_FW_FILTER_WR_OVLAN_VLD) +#define F_FW_FILTER_WR_OVLAN_VLD V_FW_FILTER_WR_OVLAN_VLD(1U) + +#define S_FW_FILTER_WR_IVLAN_VLDM 3 +#define M_FW_FILTER_WR_IVLAN_VLDM 0x1 +#define V_FW_FILTER_WR_IVLAN_VLDM(x) ((x) << S_FW_FILTER_WR_IVLAN_VLDM) +#define G_FW_FILTER_WR_IVLAN_VLDM(x) \ + (((x) >> S_FW_FILTER_WR_IVLAN_VLDM) & M_FW_FILTER_WR_IVLAN_VLDM) +#define F_FW_FILTER_WR_IVLAN_VLDM V_FW_FILTER_WR_IVLAN_VLDM(1U) + +#define S_FW_FILTER_WR_OVLAN_VLDM 2 +#define M_FW_FILTER_WR_OVLAN_VLDM 0x1 +#define V_FW_FILTER_WR_OVLAN_VLDM(x) ((x) << S_FW_FILTER_WR_OVLAN_VLDM) +#define G_FW_FILTER_WR_OVLAN_VLDM(x) \ + (((x) >> S_FW_FILTER_WR_OVLAN_VLDM) & M_FW_FILTER_WR_OVLAN_VLDM) +#define F_FW_FILTER_WR_OVLAN_VLDM V_FW_FILTER_WR_OVLAN_VLDM(1U) + +#define S_FW_FILTER_WR_RX_CHAN 15 +#define M_FW_FILTER_WR_RX_CHAN 0x1 +#define V_FW_FILTER_WR_RX_CHAN(x) ((x) << S_FW_FILTER_WR_RX_CHAN) +#define G_FW_FILTER_WR_RX_CHAN(x) \ + (((x) >> S_FW_FILTER_WR_RX_CHAN) & M_FW_FILTER_WR_RX_CHAN) +#define F_FW_FILTER_WR_RX_CHAN V_FW_FILTER_WR_RX_CHAN(1U) + +#define S_FW_FILTER_WR_RX_RPL_IQ 0 +#define M_FW_FILTER_WR_RX_RPL_IQ 0x3ff +#define V_FW_FILTER_WR_RX_RPL_IQ(x) ((x) << S_FW_FILTER_WR_RX_RPL_IQ) +#define G_FW_FILTER_WR_RX_RPL_IQ(x) \ + (((x) >> S_FW_FILTER_WR_RX_RPL_IQ) & M_FW_FILTER_WR_RX_RPL_IQ) + +#define S_FW_FILTER_WR_MACI 23 +#define M_FW_FILTER_WR_MACI 0x1ff +#define V_FW_FILTER_WR_MACI(x) ((x) << S_FW_FILTER_WR_MACI) +#define G_FW_FILTER_WR_MACI(x) \ + (((x) >> S_FW_FILTER_WR_MACI) & M_FW_FILTER_WR_MACI) + +#define S_FW_FILTER_WR_MACIM 14 +#define M_FW_FILTER_WR_MACIM 0x1ff +#define V_FW_FILTER_WR_MACIM(x) ((x) << S_FW_FILTER_WR_MACIM) +#define G_FW_FILTER_WR_MACIM(x) \ + (((x) >> S_FW_FILTER_WR_MACIM) & M_FW_FILTER_WR_MACIM) + +#define S_FW_FILTER_WR_FCOE 13 +#define M_FW_FILTER_WR_FCOE 0x1 +#define V_FW_FILTER_WR_FCOE(x) ((x) << S_FW_FILTER_WR_FCOE) +#define G_FW_FILTER_WR_FCOE(x) \ + (((x) >> S_FW_FILTER_WR_FCOE) & M_FW_FILTER_WR_FCOE) +#define F_FW_FILTER_WR_FCOE V_FW_FILTER_WR_FCOE(1U) + +#define S_FW_FILTER_WR_FCOEM 12 +#define M_FW_FILTER_WR_FCOEM 0x1 +#define V_FW_FILTER_WR_FCOEM(x) ((x) << S_FW_FILTER_WR_FCOEM) +#define G_FW_FILTER_WR_FCOEM(x) \ + (((x) >> S_FW_FILTER_WR_FCOEM) & M_FW_FILTER_WR_FCOEM) +#define F_FW_FILTER_WR_FCOEM V_FW_FILTER_WR_FCOEM(1U) + +#define S_FW_FILTER_WR_PORT 9 +#define M_FW_FILTER_WR_PORT 0x7 +#define V_FW_FILTER_WR_PORT(x) ((x) << S_FW_FILTER_WR_PORT) +#define G_FW_FILTER_WR_PORT(x) \ + (((x) >> S_FW_FILTER_WR_PORT) & M_FW_FILTER_WR_PORT) + +#define S_FW_FILTER_WR_PORTM 6 +#define M_FW_FILTER_WR_PORTM 0x7 +#define V_FW_FILTER_WR_PORTM(x) ((x) << S_FW_FILTER_WR_PORTM) +#define G_FW_FILTER_WR_PORTM(x) \ + (((x) >> S_FW_FILTER_WR_PORTM) & M_FW_FILTER_WR_PORTM) + +#define S_FW_FILTER_WR_MATCHTYPE 3 +#define M_FW_FILTER_WR_MATCHTYPE 0x7 +#define V_FW_FILTER_WR_MATCHTYPE(x) ((x) << S_FW_FILTER_WR_MATCHTYPE) +#define G_FW_FILTER_WR_MATCHTYPE(x) \ + (((x) >> S_FW_FILTER_WR_MATCHTYPE) & M_FW_FILTER_WR_MATCHTYPE) + +#define S_FW_FILTER_WR_MATCHTYPEM 0 +#define M_FW_FILTER_WR_MATCHTYPEM 0x7 +#define V_FW_FILTER_WR_MATCHTYPEM(x) ((x) << S_FW_FILTER_WR_MATCHTYPEM) +#define G_FW_FILTER_WR_MATCHTYPEM(x) \ + (((x) >> S_FW_FILTER_WR_MATCHTYPEM) & M_FW_FILTER_WR_MATCHTYPEM) struct fw_ulptx_wr { __be32 op_to_compl; @@ -100,6 +416,108 @@ struct fw_eth_tx_pkt_wr { __be64 r3; }; +struct fw_ofld_connection_wr { + __be32 op_compl; + __be32 len16_pkd; + __u64 cookie; + __be64 r2; + __be64 r3; + struct fw_ofld_connection_le { + __be32 version_cpl; + __be32 filter; + __be32 r1; + __be16 lport; + __be16 pport; + union fw_ofld_connection_leip { + struct fw_ofld_connection_le_ipv4 { + __be32 pip; + __be32 lip; + __be64 r0; + __be64 r1; + __be64 r2; + } ipv4; + struct fw_ofld_connection_le_ipv6 { + __be64 pip_hi; + __be64 pip_lo; + __be64 lip_hi; + __be64 lip_lo; + } ipv6; + } u; + } le; + struct fw_ofld_connection_tcb { + __be32 t_state_to_astid; + __be16 cplrxdataack_cplpassacceptrpl; + __be16 rcv_adv; + __be32 rcv_nxt; + __be32 tx_max; + __be64 opt0; + __be32 opt2; + __be32 r1; + __be64 r2; + __be64 r3; + } tcb; +}; + +#define S_FW_OFLD_CONNECTION_WR_VERSION 31 +#define M_FW_OFLD_CONNECTION_WR_VERSION 0x1 +#define V_FW_OFLD_CONNECTION_WR_VERSION(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_VERSION) +#define G_FW_OFLD_CONNECTION_WR_VERSION(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_VERSION) & \ + M_FW_OFLD_CONNECTION_WR_VERSION) +#define F_FW_OFLD_CONNECTION_WR_VERSION \ + V_FW_OFLD_CONNECTION_WR_VERSION(1U) + +#define S_FW_OFLD_CONNECTION_WR_CPL 30 +#define M_FW_OFLD_CONNECTION_WR_CPL 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPL(x) ((x) << S_FW_OFLD_CONNECTION_WR_CPL) +#define G_FW_OFLD_CONNECTION_WR_CPL(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPL) & M_FW_OFLD_CONNECTION_WR_CPL) +#define F_FW_OFLD_CONNECTION_WR_CPL V_FW_OFLD_CONNECTION_WR_CPL(1U) + +#define S_FW_OFLD_CONNECTION_WR_T_STATE 28 +#define M_FW_OFLD_CONNECTION_WR_T_STATE 0xf +#define V_FW_OFLD_CONNECTION_WR_T_STATE(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_T_STATE) +#define G_FW_OFLD_CONNECTION_WR_T_STATE(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_T_STATE) & \ + M_FW_OFLD_CONNECTION_WR_T_STATE) + +#define S_FW_OFLD_CONNECTION_WR_RCV_SCALE 24 +#define M_FW_OFLD_CONNECTION_WR_RCV_SCALE 0xf +#define V_FW_OFLD_CONNECTION_WR_RCV_SCALE(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_RCV_SCALE) +#define G_FW_OFLD_CONNECTION_WR_RCV_SCALE(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_RCV_SCALE) & \ + M_FW_OFLD_CONNECTION_WR_RCV_SCALE) + +#define S_FW_OFLD_CONNECTION_WR_ASTID 0 +#define M_FW_OFLD_CONNECTION_WR_ASTID 0xffffff +#define V_FW_OFLD_CONNECTION_WR_ASTID(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_ASTID) +#define G_FW_OFLD_CONNECTION_WR_ASTID(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_ASTID) & M_FW_OFLD_CONNECTION_WR_ASTID) + +#define S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK 15 +#define M_FW_OFLD_CONNECTION_WR_CPLRXDATAACK 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) +#define G_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) & \ + M_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) +#define F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK \ + V_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(1U) + +#define S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL 14 +#define M_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) +#define G_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) & \ + M_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) +#define F_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL \ + V_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(1U) + enum fw_flowc_mnem { FW_FLOWC_MNEM_PFNVFN, /* PFN [15:8] VFN [7:0] */ FW_FLOWC_MNEM_CH, diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index abf26c7c1d19..3bc1912afba9 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -616,7 +616,7 @@ static inline bool be_error(struct be_adapter *adapter) return adapter->eeh_error || adapter->hw_error || adapter->fw_timeout; } -static inline bool be_crit_error(struct be_adapter *adapter) +static inline bool be_hw_error(struct be_adapter *adapter) { return adapter->eeh_error || adapter->hw_error; } diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index f2875aa47661..8a250c38fb82 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -298,7 +298,12 @@ void be_async_mcc_enable(struct be_adapter *adapter) void be_async_mcc_disable(struct be_adapter *adapter) { + spin_lock_bh(&adapter->mcc_cq_lock); + adapter->mcc_obj.rearm_cq = false; + be_cq_notify(adapter, adapter->mcc_obj.cq.id, false, 0); + + spin_unlock_bh(&adapter->mcc_cq_lock); } int be_process_mcc(struct be_adapter *adapter) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index f95612b907ae..9dca22be8125 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1689,15 +1689,41 @@ static void be_rx_cq_clean(struct be_rx_obj *rxo) struct be_queue_info *rxq = &rxo->q; struct be_queue_info *rx_cq = &rxo->cq; struct be_rx_compl_info *rxcp; + struct be_adapter *adapter = rxo->adapter; + int flush_wait = 0; u16 tail; - /* First cleanup pending rx completions */ - while ((rxcp = be_rx_compl_get(rxo)) != NULL) { - be_rx_compl_discard(rxo, rxcp); - be_cq_notify(rxo->adapter, rx_cq->id, false, 1); + /* Consume pending rx completions. + * Wait for the flush completion (identified by zero num_rcvd) + * to arrive. Notify CQ even when there are no more CQ entries + * for HW to flush partially coalesced CQ entries. + * In Lancer, there is no need to wait for flush compl. + */ + for (;;) { + rxcp = be_rx_compl_get(rxo); + if (rxcp == NULL) { + if (lancer_chip(adapter)) + break; + + if (flush_wait++ > 10 || be_hw_error(adapter)) { + dev_warn(&adapter->pdev->dev, + "did not receive flush compl\n"); + break; + } + be_cq_notify(adapter, rx_cq->id, true, 0); + mdelay(1); + } else { + be_rx_compl_discard(rxo, rxcp); + be_cq_notify(adapter, rx_cq->id, true, 1); + if (rxcp->num_rcvd == 0) + break; + } } - /* Then free posted rx buffer that were not used */ + /* After cleanup, leave the CQ in unarmed state */ + be_cq_notify(adapter, rx_cq->id, false, 0); + + /* Then free posted rx buffers that were not used */ tail = (rxq->head + rxq->len - atomic_read(&rxq->used)) % rxq->len; for (; atomic_read(&rxq->used) > 0; index_inc(&tail, rxq->len)) { page_info = get_rx_page_info(rxo, tail); @@ -2157,7 +2183,7 @@ void be_detect_error(struct be_adapter *adapter) u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0; u32 i; - if (be_crit_error(adapter)) + if (be_hw_error(adapter)) return; if (lancer_chip(adapter)) { @@ -2398,13 +2424,22 @@ static int be_close(struct net_device *netdev) be_roce_dev_close(adapter); - be_async_mcc_disable(adapter); - if (!lancer_chip(adapter)) be_intr_set(adapter, false); - for_all_evt_queues(adapter, eqo, i) { + for_all_evt_queues(adapter, eqo, i) napi_disable(&eqo->napi); + + be_async_mcc_disable(adapter); + + /* Wait for all pending tx completions to arrive so that + * all tx skbs are freed. + */ + be_tx_compl_clean(adapter); + + be_rx_qs_destroy(adapter); + + for_all_evt_queues(adapter, eqo, i) { if (msix_enabled(adapter)) synchronize_irq(be_msix_vec_get(adapter, eqo)); else @@ -2414,12 +2449,6 @@ static int be_close(struct net_device *netdev) be_irq_unregister(adapter); - /* Wait for all pending tx completions to arrive so that - * all tx skbs are freed. - */ - be_tx_compl_clean(adapter); - - be_rx_qs_destroy(adapter); return 0; } diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 5ba6e1cbd346..ec490d741fc0 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -94,9 +94,8 @@ config GIANFAR config FEC_PTP bool "PTP Hardware Clock (PHC)" - depends on FEC && ARCH_MXC + depends on FEC && ARCH_MXC && !SOC_IMX25 && !SOC_IMX27 && !SOC_IMX35 && !SOC_IMX5 select PTP_1588_CLOCK - default y if SOC_IMX6Q --help--- Say Y here if you want to use PTP Hardware Clock (PHC) in the driver. Only the basic clock operations have been implemented. diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9a9de51ecc91..8b3d0512a46b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1338,6 +1338,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; + u32 dword_field; int err; u8 byte_field; @@ -1372,10 +1373,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); + MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); + if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { + param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + } else { + MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET); + if (byte_field & 0x8) + param->steering_mode = MLX4_STEERING_MODE_B0; + else + param->steering_mode = MLX4_STEERING_MODE_A0; + } /* steering attributes */ - if (dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) { - + if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 2c2e7ade2a34..dbf2f69cc59f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -172,6 +172,7 @@ struct mlx4_init_hca_param { u8 log_uar_sz; u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 fs_hash_enable_bits; + u8 steering_mode; /* for QUERY_HCA */ u64 dev_cap_enabled; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index b2acbe7706a3..e1bafffbc3b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -85,15 +85,15 @@ static int probe_vf; module_param(probe_vf, int, 0644); MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)"); -int mlx4_log_num_mgm_entry_size = 10; +int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; module_param_named(log_num_mgm_entry_size, mlx4_log_num_mgm_entry_size, int, 0444); MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " of qp per mcg, for example:" - " 10 gives 248.range: 9<=" + " 10 gives 248.range: 7 <=" " log_num_mgm_entry_size <= 12." - " Not in use with device managed" - " flow steering"); + " To activate device managed" + " flow steering when available, set to -1"); static bool enable_64b_cqe_eqe; module_param(enable_64b_cqe_eqe, bool, 0444); @@ -281,28 +281,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; - if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) { - dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; - dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; - dev->caps.fs_log_max_ucast_qp_range_size = - dev_cap->fs_log_max_ucast_qp_range_size; - } else { - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) { - dev->caps.steering_mode = MLX4_STEERING_MODE_B0; - } else { - dev->caps.steering_mode = MLX4_STEERING_MODE_A0; - - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) - mlx4_warn(dev, "Must have UC_STEER and MC_STEER flags " - "set to use B0 steering. Falling back to A0 steering mode.\n"); - } - dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); - } - mlx4_dbg(dev, "Steering mode is: %s\n", - mlx4_steering_mode_str(dev->caps.steering_mode)); - /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; @@ -493,6 +471,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +static void slave_adjust_steering_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *hca_param) +{ + dev->caps.steering_mode = hca_param->steering_mode; + if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else + dev->caps.num_qp_per_mgm = + 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); + + mlx4_dbg(dev, "Steering mode is: %s\n", + mlx4_steering_mode_str(dev->caps.steering_mode)); +} + static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; @@ -635,6 +630,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.cqe_size = 32; } + slave_adjust_steering_mode(dev, &dev_cap, &hca_param); + return 0; err_mem: @@ -1321,6 +1318,59 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) } } +static int choose_log_fs_mgm_entry_size(int qp_per_entry) +{ + int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; + + for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; + i++) { + if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) + break; + } + + return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; +} + +static void choose_steering_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap) +{ + if (mlx4_log_num_mgm_entry_size == -1 && + dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && + (!mlx4_is_mfunc(dev) || + (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) && + choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= + MLX4_MIN_MGM_LOG_ENTRY_SIZE) { + dev->oper_log_mgm_entry_size = + choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); + dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else { + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + dev->caps.steering_mode = MLX4_STEERING_MODE_B0; + else { + dev->caps.steering_mode = MLX4_STEERING_MODE_A0; + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " + "set to use B0 steering. Falling back to A0 steering mode.\n"); + } + dev->oper_log_mgm_entry_size = + mlx4_log_num_mgm_entry_size > 0 ? + mlx4_log_num_mgm_entry_size : + MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; + dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); + } + mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " + "modparam log_num_mgm_entry_size = %d\n", + mlx4_steering_mode_str(dev->caps.steering_mode), + dev->oper_log_mgm_entry_size, + mlx4_log_num_mgm_entry_size); +} + static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1360,6 +1410,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto err_stop_fw; } + choose_steering_mode(dev, &dev_cap); + if (mlx4_is_master(dev)) mlx4_parav_master_pf_caps(dev); @@ -2452,6 +2504,17 @@ static int __init mlx4_verify_params(void) port_type_array[0] = true; } + if (mlx4_log_num_mgm_entry_size != -1 && + (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || + mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { + pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " + "in legal range (-1 or %d..%d)\n", + mlx4_log_num_mgm_entry_size, + MLX4_MIN_MGM_LOG_ENTRY_SIZE, + MLX4_MAX_MGM_LOG_ENTRY_SIZE); + return -1; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index e151c21baf2b..1ee4db3c6400 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -54,12 +54,7 @@ struct mlx4_mgm { int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { - if (dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) - return 1 << MLX4_FS_MGM_LOG_ENTRY_SIZE; - else - return min((1 << mlx4_log_num_mgm_entry_size), - MLX4_MAX_MGM_ENTRY_SIZE); + return 1 << dev->oper_log_mgm_entry_size; } int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 1cf42036d7bb..116c5c29d2d1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -94,8 +94,10 @@ enum { }; enum { - MLX4_MAX_MGM_ENTRY_SIZE = 0x1000, - MLX4_MAX_QP_PER_MGM = 4 * (MLX4_MAX_MGM_ENTRY_SIZE / 16 - 2), + MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10, + MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7, + MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12, + MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE) / 16 - 2), MLX4_MTT_ENTRY_PER_SEG = 8, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index b05705f50f0f..561ed2a22a17 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3071,6 +3071,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC]; int err; + int qpn; struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; @@ -3080,13 +3081,21 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; + qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; + err = get_res(dev, slave, qpn, RES_QP, NULL); + if (err) { + pr_err("Steering rule with qpn 0x%x rejected.\n", qpn); + return err; + } rule_header = (struct _rule_hw *)(ctrl + 1); header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: - if (validate_eth_header_mac(slave, rule_header, rlist)) - return -EINVAL; + if (validate_eth_header_mac(slave, rule_header, rlist)) { + err = -EINVAL; + goto err_put; + } break; case MLX4_NET_TRANS_RULE_ID_IB: break; @@ -3094,14 +3103,17 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n"); - if (add_eth_header(dev, slave, inbox, rlist, header_id)) - return -EINVAL; + if (add_eth_header(dev, slave, inbox, rlist, header_id)) { + err = -EINVAL; + goto err_put; + } vhcr->in_modifier += sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; break; default: pr_err("Corrupted mailbox.\n"); - return -EINVAL; + err = -EINVAL; + goto err_put; } err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, @@ -3109,16 +3121,18 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) - return err; + goto err_put; err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0); if (err) { mlx4_err(dev, "Fail to add flow steering resources.\n "); /* detach rule*/ mlx4_cmd(dev, vhcr->out_param, 0, 0, - MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } +err_put: + put_res(dev, slave, qpn, RES_QP); return err; } diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 83f0ea929d3d..8ebc352bcbe6 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -4761,7 +4761,7 @@ static void transmit_cleanup(struct dev_info *hw_priv, int normal) struct ksz_dma_buf *dma_buf; struct net_device *dev = NULL; - spin_lock(&hw_priv->hwlock); + spin_lock_irq(&hw_priv->hwlock); last = info->last; while (info->avail < info->alloc) { @@ -4795,7 +4795,7 @@ static void transmit_cleanup(struct dev_info *hw_priv, int normal) info->avail++; } info->last = last; - spin_unlock(&hw_priv->hwlock); + spin_unlock_irq(&hw_priv->hwlock); /* Notify the network subsystem that the packet has been sent. */ if (dev) @@ -5259,11 +5259,15 @@ static irqreturn_t netdev_intr(int irq, void *dev_id) struct dev_info *hw_priv = priv->adapter; struct ksz_hw *hw = &hw_priv->hw; + spin_lock(&hw_priv->hwlock); + hw_read_intr(hw, &int_enable); /* Not our interrupt! */ - if (!int_enable) + if (!int_enable) { + spin_unlock(&hw_priv->hwlock); return IRQ_NONE; + } do { hw_ack_intr(hw, int_enable); @@ -5310,6 +5314,8 @@ static irqreturn_t netdev_intr(int irq, void *dev_id) hw_ena_intr(hw); + spin_unlock(&hw_priv->hwlock); + return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 537902479689..bc7ec64e9c7a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -36,8 +36,8 @@ #define _QLCNIC_LINUX_MAJOR 5 #define _QLCNIC_LINUX_MINOR 0 -#define _QLCNIC_LINUX_SUBVERSION 29 -#define QLCNIC_LINUX_VERSIONID "5.0.29" +#define _QLCNIC_LINUX_SUBVERSION 30 +#define QLCNIC_LINUX_VERSIONID "5.0.30" #define QLCNIC_DRV_IDC_VER 0x01 #define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\ (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION)) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index 58f094ca052e..b14b8f0787ea 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -134,7 +134,7 @@ int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter) __le32 *tmp_buf; struct qlcnic_cmd_args cmd; struct qlcnic_hardware_context *ahw; - struct qlcnic_dump_template_hdr *tmpl_hdr, *tmp_tmpl; + struct qlcnic_dump_template_hdr *tmpl_hdr; dma_addr_t tmp_addr_t = 0; ahw = adapter->ahw; @@ -150,6 +150,8 @@ int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter) } temp_size = cmd.rsp.arg2; version = cmd.rsp.arg3; + dev_info(&adapter->pdev->dev, + "minidump template version = 0x%x", version); if (!temp_size) return -EIO; @@ -174,7 +176,6 @@ int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter) err = -EIO; goto error; } - tmp_tmpl = tmp_addr; ahw->fw_dump.tmpl_hdr = vzalloc(temp_size); if (!ahw->fw_dump.tmpl_hdr) { err = -EIO; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index fc48e000f35f..7a6d5ebe4e0f 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -365,7 +365,7 @@ static int qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter, struct cmd_desc_type0 *cmd_desc_arr, int nr_desc) { - u32 i, producer, consumer; + u32 i, producer; struct qlcnic_cmd_buffer *pbuf; struct cmd_desc_type0 *cmd_desc; struct qlcnic_host_tx_ring *tx_ring; @@ -379,7 +379,6 @@ qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter, __netif_tx_lock_bh(tx_ring->txq); producer = tx_ring->producer; - consumer = tx_ring->sw_consumer; if (nr_desc >= qlcnic_tx_avail(tx_ring)) { netif_tx_stop_queue(tx_ring->txq); @@ -402,7 +401,7 @@ qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter, pbuf->frag_count = 0; memcpy(&tx_ring->desc_head[producer], - &cmd_desc_arr[i], sizeof(struct cmd_desc_type0)); + cmd_desc, sizeof(struct cmd_desc_type0)); producer = get_next_index(producer, tx_ring->num_desc); i++; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index a7554d9aab0c..d833f5927891 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -445,13 +445,10 @@ static int qlcnic_set_function_modes(struct qlcnic_adapter *adapter) { u8 id; - u32 ref_count; int i, ret = 1; u32 data = QLCNIC_MGMT_FUNC; struct qlcnic_hardware_context *ahw = adapter->ahw; - /* If other drivers are not in use set their privilege level */ - ref_count = QLCRD32(adapter, QLCNIC_CRB_DRV_ACTIVE); ret = qlcnic_api_lock(adapter); if (ret) goto err_lock; @@ -531,11 +528,9 @@ static int qlcnic_setup_pci_map(struct pci_dev *pdev, { u32 offset; void __iomem *mem_ptr0 = NULL; - resource_size_t mem_base; unsigned long mem_len, pci_len0 = 0, bar0_len; /* remap phys address */ - mem_base = pci_resource_start(pdev, 0); /* 0 is for BAR 0 */ mem_len = pci_resource_len(pdev, 0); qlcnic_get_bar_length(pdev->device, &bar0_len); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index 12ff29270745..0b8d8625834c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -197,7 +197,7 @@ static u32 qlcnic_dump_ctrl(struct qlcnic_adapter *adapter, int i, k, timeout = 0; void __iomem *base = adapter->ahw->pci_base0; u32 addr, data; - u8 opcode, no_ops; + u8 no_ops; struct __ctrl *ctr = &entry->region.ctrl; struct qlcnic_dump_template_hdr *t_hdr = adapter->ahw->fw_dump.tmpl_hdr; @@ -206,7 +206,6 @@ static u32 qlcnic_dump_ctrl(struct qlcnic_adapter *adapter, for (i = 0; i < no_ops; i++) { k = 0; - opcode = 0; for (k = 0; k < 8; k++) { if (!(ctr->opcode & (1 << k))) continue; diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index cb6fc5a743ca..5ac93323a40c 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -577,28 +577,30 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) { struct net_device *dev = dev_instance; struct cp_private *cp; + int handled = 0; u16 status; if (unlikely(dev == NULL)) return IRQ_NONE; cp = netdev_priv(dev); + spin_lock(&cp->lock); + status = cpr16(IntrStatus); if (!status || (status == 0xFFFF)) - return IRQ_NONE; + goto out_unlock; + + handled = 1; netif_dbg(cp, intr, dev, "intr, status %04x cmd %02x cpcmd %04x\n", status, cpr8(Cmd), cpr16(CpCmd)); cpw16(IntrStatus, status & ~cp_rx_intr_mask); - spin_lock(&cp->lock); - /* close possible race's with dev_close */ if (unlikely(!netif_running(dev))) { cpw16(IntrMask, 0); - spin_unlock(&cp->lock); - return IRQ_HANDLED; + goto out_unlock; } if (status & (RxOK | RxErr | RxEmpty | RxFIFOOvr)) @@ -612,7 +614,6 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) if (status & LinkChg) mii_check_media(&cp->mii_if, netif_msg_link(cp), false); - spin_unlock(&cp->lock); if (status & PciErr) { u16 pci_status; @@ -625,7 +626,10 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) /* TODO: reset hardware */ } - return IRQ_HANDLED; +out_unlock: + spin_unlock(&cp->lock); + + return IRQ_RETVAL(handled); } #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 022b45bc14ff..a670d23d9340 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2386,8 +2386,6 @@ static const struct of_device_id smc91x_match[] = { {}, }; MODULE_DEVICE_TABLE(of, smc91x_match); -#else -#define smc91x_match NULL #endif static struct dev_pm_ops smc_drv_pm_ops = { @@ -2402,7 +2400,7 @@ static struct platform_driver smc_driver = { .name = CARDNAME, .owner = THIS_MODULE, .pm = &smc_drv_pm_ops, - .of_match_table = smc91x_match, + .of_match_table = of_match_ptr(smc91x_match), }, }; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 4616bf27d515..e112877d15d3 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2575,11 +2575,13 @@ static const struct dev_pm_ops smsc911x_pm_ops = { #define SMSC911X_PM_OPS NULL #endif +#ifdef CONFIG_OF static const struct of_device_id smsc911x_dt_ids[] = { { .compatible = "smsc,lan9115", }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, smsc911x_dt_ids); +#endif static struct platform_driver smsc911x_driver = { .probe = smsc911x_drv_probe, @@ -2588,7 +2590,7 @@ static struct platform_driver smsc911x_driver = { .name = SMSC_CHIPNAME, .owner = THIS_MODULE, .pm = SMSC911X_PM_OPS, - .of_match_table = smsc911x_dt_ids, + .of_match_table = of_match_ptr(smsc911x_dt_ids), }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 023a4fb4efa5..b05df8983be5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -127,14 +127,14 @@ static inline int stmmac_register_platform(void) } static inline void stmmac_unregister_platform(void) { - platform_driver_register(&stmmac_pltfr_driver); + platform_driver_unregister(&stmmac_pltfr_driver); } #else static inline int stmmac_register_platform(void) { pr_debug("stmmac: do not register the platf driver\n"); - return -EINVAL; + return 0; } static inline void stmmac_unregister_platform(void) { @@ -162,7 +162,7 @@ static inline int stmmac_register_pci(void) { pr_debug("stmmac: do not register the PCI driver\n"); - return -EINVAL; + return 0; } static inline void stmmac_unregister_pci(void) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 542edbcd92c7..f07c0612abf6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2194,18 +2194,20 @@ int stmmac_restore(struct net_device *ndev) */ static int __init stmmac_init(void) { - int err_plt = 0; - int err_pci = 0; - - err_plt = stmmac_register_platform(); - err_pci = stmmac_register_pci(); - - if ((err_pci) && (err_plt)) { - pr_err("stmmac: driver registration failed\n"); - return -EINVAL; - } + int ret; + ret = stmmac_register_platform(); + if (ret) + goto err; + ret = stmmac_register_pci(); + if (ret) + goto err_pci; return 0; +err_pci: + stmmac_unregister_platform(); +err: + pr_err("stmmac: driver registration failed\n"); + return ret; } static void __exit stmmac_exit(void) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index 337766738eca..5e62c1aeeffb 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -27,8 +27,6 @@ #include <linux/uaccess.h> #include <linux/workqueue.h> -#include <plat/clock.h> - #include "cpts.h" #ifdef CONFIG_TI_CPTS diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 40b426edc9e6..504f7f1cad94 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -138,6 +138,8 @@ struct tun_file { /* only used for fasnyc */ unsigned int flags; u16 queue_index; + struct list_head next; + struct tun_struct *detached; }; struct tun_flow_entry { @@ -182,6 +184,8 @@ struct tun_struct { struct hlist_head flows[TUN_NUM_FLOW_ENTRIES]; struct timer_list flow_gc_timer; unsigned long ageing_time; + unsigned int numdisabled; + struct list_head disabled; }; static inline u32 tun_hashfn(u32 rxhash) @@ -385,6 +389,23 @@ static void tun_set_real_num_queues(struct tun_struct *tun) netif_set_real_num_rx_queues(tun->dev, tun->numqueues); } +static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile) +{ + tfile->detached = tun; + list_add_tail(&tfile->next, &tun->disabled); + ++tun->numdisabled; +} + +static struct tun_struct *tun_enable_queue(struct tun_file *tfile) +{ + struct tun_struct *tun = tfile->detached; + + tfile->detached = NULL; + list_del_init(&tfile->next); + --tun->numdisabled; + return tun; +} + static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -406,20 +427,25 @@ static void __tun_detach(struct tun_file *tfile, bool clean) ntfile->queue_index = index; --tun->numqueues; - sock_put(&tfile->sk); + if (clean) + sock_put(&tfile->sk); + else + tun_disable_queue(tun, tfile); synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); /* Drop read queue */ skb_queue_purge(&tfile->sk.sk_receive_queue); tun_set_real_num_queues(tun); - - if (tun->numqueues == 0 && !(tun->flags & TUN_PERSIST)) - if (dev->reg_state == NETREG_REGISTERED) - unregister_netdevice(dev); - } + } else if (tfile->detached && clean) + tun = tun_enable_queue(tfile); if (clean) { + if (tun && tun->numqueues == 0 && tun->numdisabled == 0 && + !(tun->flags & TUN_PERSIST)) + if (tun->dev->reg_state == NETREG_REGISTERED) + unregister_netdevice(tun->dev); + BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags)); sk_release_kernel(&tfile->sk); @@ -436,7 +462,7 @@ static void tun_detach(struct tun_file *tfile, bool clean) static void tun_detach_all(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); - struct tun_file *tfile; + struct tun_file *tfile, *tmp; int i, n = tun->numqueues; for (i = 0; i < n; i++) { @@ -457,6 +483,12 @@ static void tun_detach_all(struct net_device *dev) skb_queue_purge(&tfile->sk.sk_receive_queue); sock_put(&tfile->sk); } + list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_enable_queue(tfile); + skb_queue_purge(&tfile->sk.sk_receive_queue); + sock_put(&tfile->sk); + } + BUG_ON(tun->numdisabled != 0); } static int tun_attach(struct tun_struct *tun, struct file *file) @@ -473,7 +505,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file) goto out; err = -E2BIG; - if (tun->numqueues == MAX_TAP_QUEUES) + if (!tfile->detached && + tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES) goto out; err = 0; @@ -487,9 +520,13 @@ static int tun_attach(struct tun_struct *tun, struct file *file) tfile->queue_index = tun->numqueues; rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); - sock_hold(&tfile->sk); tun->numqueues++; + if (tfile->detached) + tun_enable_queue(tfile); + else + sock_hold(&tfile->sk); + tun_set_real_num_queues(tun); /* device is allowed to go away first, so no need to hold extra @@ -1162,6 +1199,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; } + skb_reset_network_header(skb); rxhash = skb_get_rxhash(skb); netif_rx_ni(skb); @@ -1349,6 +1387,7 @@ static void tun_free_netdev(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); + BUG_ON(!(list_empty(&tun->disabled))); tun_flow_uninit(tun); free_netdev(dev); } @@ -1543,6 +1582,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) err = tun_attach(tun, file); if (err < 0) return err; + + if (tun->flags & TUN_TAP_MQ && + (tun->numqueues + tun->numdisabled > 1)) + return err; } else { char *name; @@ -1601,6 +1644,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) TUN_USER_FEATURES; dev->features = dev->hw_features; + INIT_LIST_HEAD(&tun->disabled); err = tun_attach(tun, file); if (err < 0) goto err_free_dev; @@ -1755,32 +1799,28 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) { struct tun_file *tfile = file->private_data; struct tun_struct *tun; - struct net_device *dev; int ret = 0; rtnl_lock(); if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { - dev = __dev_get_by_name(tfile->net, ifr->ifr_name); - if (!dev) { - ret = -EINVAL; - goto unlock; - } - - tun = netdev_priv(dev); - if (dev->netdev_ops != &tap_netdev_ops && - dev->netdev_ops != &tun_netdev_ops) + tun = tfile->detached; + if (!tun) ret = -EINVAL; else if (tun_not_capable(tun)) ret = -EPERM; else ret = tun_attach(tun, file); - } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) - __tun_detach(tfile, false); - else + } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { + tun = rcu_dereference_protected(tfile->tun, + lockdep_rtnl_is_held()); + if (!tun || !(tun->flags & TUN_TAP_MQ)) + ret = -EINVAL; + else + __tun_detach(tfile, false); + } else ret = -EINVAL; -unlock: rtnl_unlock(); return ret; } @@ -2092,6 +2132,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) file->private_data = tfile; set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); + INIT_LIST_HEAD(&tfile->next); return 0; } diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index d0129827602b..3f3d12d766e7 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -457,12 +457,6 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf) } EXPORT_SYMBOL_GPL(usbnet_cdc_bind); -static int cdc_manage_power(struct usbnet *dev, int on) -{ - dev->intf->needs_remote_wakeup = on; - return 0; -} - static const struct driver_info cdc_info = { .description = "CDC Ethernet Device", .flags = FLAG_ETHER | FLAG_POINTTOPOINT, @@ -470,7 +464,7 @@ static const struct driver_info cdc_info = { .bind = usbnet_cdc_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_status, - .manage_power = cdc_manage_power, + .manage_power = usbnet_manage_power, }; static const struct driver_info wwan_info = { @@ -479,7 +473,7 @@ static const struct driver_info wwan_info = { .bind = usbnet_cdc_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_status, - .manage_power = cdc_manage_power, + .manage_power = usbnet_manage_power, }; /*-------------------------------------------------------------------------*/ @@ -487,6 +481,7 @@ static const struct driver_info wwan_info = { #define HUAWEI_VENDOR_ID 0x12D1 #define NOVATEL_VENDOR_ID 0x1410 #define ZTE_VENDOR_ID 0x19D2 +#define DELL_VENDOR_ID 0x413C static const struct usb_device_id products [] = { /* @@ -594,27 +589,29 @@ static const struct usb_device_id products [] = { /* Novatel USB551L and MC551 - handled by qmi_wwan */ { - .match_flags = USB_DEVICE_ID_MATCH_VENDOR - | USB_DEVICE_ID_MATCH_PRODUCT - | USB_DEVICE_ID_MATCH_INT_INFO, - .idVendor = NOVATEL_VENDOR_ID, - .idProduct = 0xB001, - .bInterfaceClass = USB_CLASS_COMM, - .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, - .bInterfaceProtocol = USB_CDC_PROTO_NONE, + USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0xB001, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Novatel E362 - handled by qmi_wwan */ { - .match_flags = USB_DEVICE_ID_MATCH_VENDOR - | USB_DEVICE_ID_MATCH_PRODUCT - | USB_DEVICE_ID_MATCH_INT_INFO, - .idVendor = NOVATEL_VENDOR_ID, - .idProduct = 0x9010, - .bInterfaceClass = USB_CLASS_COMM, - .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, - .bInterfaceProtocol = USB_CDC_PROTO_NONE, + USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0x9010, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + +/* Dell Wireless 5800 (Novatel E362) - handled by qmi_wwan */ +{ + USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x8195, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + +/* Dell Wireless 5800 (Novatel E362) - handled by qmi_wwan */ +{ + USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x8196, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index d38bc20a60e2..71b6e92b8e9b 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1129,19 +1129,13 @@ static void cdc_ncm_disconnect(struct usb_interface *intf) usbnet_disconnect(intf); } -static int cdc_ncm_manage_power(struct usbnet *dev, int status) -{ - dev->intf->needs_remote_wakeup = status; - return 0; -} - static const struct driver_info cdc_ncm_info = { .description = "CDC NCM", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .check_connect = cdc_ncm_check_connect, - .manage_power = cdc_ncm_manage_power, + .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, @@ -1155,7 +1149,7 @@ static const struct driver_info wwan_info = { .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .check_connect = cdc_ncm_check_connect, - .manage_power = cdc_ncm_manage_power, + .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 1ea91f4237f0..91d7cb9728eb 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -383,6 +383,20 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, + { /* Dell Wireless 5800 (Novatel E362) */ + USB_DEVICE_AND_INTERFACE_INFO(0x413C, 0x8195, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&qmi_wwan_info, + }, + { /* Dell Wireless 5800 V2 (Novatel E362) */ + USB_DEVICE_AND_INTERFACE_INFO(0x413C, 0x8196, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&qmi_wwan_info, + }, /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ @@ -419,6 +433,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x0199, 1)}, /* ZTE MF820S */ {QMI_FIXED_INTF(0x19d2, 0x0200, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0257, 3)}, /* ZTE MF821 */ + {QMI_FIXED_INTF(0x19d2, 0x0284, 4)}, /* ZTE MF880 */ {QMI_FIXED_INTF(0x19d2, 0x0326, 4)}, /* ZTE MF821D */ {QMI_FIXED_INTF(0x19d2, 0x1008, 4)}, /* ZTE (Vodafone) K3570-Z */ {QMI_FIXED_INTF(0x19d2, 0x1010, 4)}, /* ZTE (Vodafone) K3571-Z */ diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index c04110ba677f..3d4bf01641b4 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -719,7 +719,8 @@ int usbnet_stop (struct net_device *net) dev->flags = 0; del_timer_sync (&dev->delay); tasklet_kill (&dev->bh); - if (info->manage_power) + if (info->manage_power && + !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags)) info->manage_power(dev, 0); else usb_autopm_put_interface(dev->intf); @@ -794,14 +795,14 @@ int usbnet_open (struct net_device *net) tasklet_schedule (&dev->bh); if (info->manage_power) { retval = info->manage_power(dev, 1); - if (retval < 0) - goto done_manage_power_error; - usb_autopm_put_interface(dev->intf); + if (retval < 0) { + retval = 0; + set_bit(EVENT_NO_RUNTIME_PM, &dev->flags); + } else { + usb_autopm_put_interface(dev->intf); + } } return retval; - -done_manage_power_error: - clear_bit(EVENT_DEV_OPEN, &dev->flags); done: usb_autopm_put_interface(dev->intf); done_nopm: @@ -1615,6 +1616,16 @@ void usbnet_device_suggests_idle(struct usbnet *dev) } EXPORT_SYMBOL(usbnet_device_suggests_idle); +/* + * For devices that can do without special commands + */ +int usbnet_manage_power(struct usbnet *dev, int on) +{ + dev->intf->needs_remote_wakeup = on; + return 0; +} +EXPORT_SYMBOL(usbnet_manage_power); + /*-------------------------------------------------------------------------*/ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, u16 value, u16 index, void *data, u16 size) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 68d64f0313ea..a6fcf15adc4f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -130,7 +130,6 @@ struct skb_vnet_hdr { struct virtio_net_hdr hdr; struct virtio_net_hdr_mrg_rxbuf mhdr; }; - unsigned int num_sg; }; struct padded_vnet_hdr { @@ -530,10 +529,10 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) err = add_recvbuf_small(rq, gfp); oom = err == -ENOMEM; - if (err < 0) + if (err) break; ++rq->num; - } while (err > 0); + } while (rq->vq->num_free); if (unlikely(rq->num > rq->max)) rq->max = rq->num; virtqueue_kick(rq->vq); @@ -640,10 +639,10 @@ static int virtnet_open(struct net_device *dev) return 0; } -static unsigned int free_old_xmit_skbs(struct send_queue *sq) +static void free_old_xmit_skbs(struct send_queue *sq) { struct sk_buff *skb; - unsigned int len, tot_sgs = 0; + unsigned int len; struct virtnet_info *vi = sq->vq->vdev->priv; struct virtnet_stats *stats = this_cpu_ptr(vi->stats); @@ -655,10 +654,8 @@ static unsigned int free_old_xmit_skbs(struct send_queue *sq) stats->tx_packets++; u64_stats_update_end(&stats->tx_syncp); - tot_sgs += skb_vnet_hdr(skb)->num_sg; dev_kfree_skb_any(skb); } - return tot_sgs; } static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) @@ -666,6 +663,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; struct virtnet_info *vi = sq->vq->vdev->priv; + unsigned num_sg; pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); @@ -704,8 +702,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) else sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr); - hdr->num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1; - return virtqueue_add_buf(sq->vq, sq->sg, hdr->num_sg, + num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1; + return virtqueue_add_buf(sq->vq, sq->sg, num_sg, 0, skb, GFP_ATOMIC); } @@ -714,28 +712,20 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int qnum = skb_get_queue_mapping(skb); struct send_queue *sq = &vi->sq[qnum]; - int capacity; + int err; /* Free up any pending old buffers before queueing new ones. */ free_old_xmit_skbs(sq); /* Try to transmit */ - capacity = xmit_skb(sq, skb); - - /* This can happen with OOM and indirect buffers. */ - if (unlikely(capacity < 0)) { - if (likely(capacity == -ENOMEM)) { - if (net_ratelimit()) - dev_warn(&dev->dev, - "TXQ (%d) failure: out of memory\n", - qnum); - } else { - dev->stats.tx_fifo_errors++; - if (net_ratelimit()) - dev_warn(&dev->dev, - "Unexpected TXQ (%d) failure: %d\n", - qnum, capacity); - } + err = xmit_skb(sq, skb); + + /* This should not happen! */ + if (unlikely(err)) { + dev->stats.tx_fifo_errors++; + if (net_ratelimit()) + dev_warn(&dev->dev, + "Unexpected TXQ (%d) queue failure: %d\n", qnum, err); dev->stats.tx_dropped++; kfree_skb(skb); return NETDEV_TX_OK; @@ -748,12 +738,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Apparently nice girls don't return TX_BUSY; stop the queue * before it gets out of hand. Naturally, this wastes entries. */ - if (capacity < 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { netif_stop_subqueue(dev, qnum); if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ - capacity += free_old_xmit_skbs(sq); - if (capacity >= 2+MAX_SKB_FRAGS) { + free_old_xmit_skbs(sq); + if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { netif_start_subqueue(dev, qnum); virtqueue_disable_cb(sq->vq); } diff --git a/drivers/net/wimax/i2400m/i2400m-usb.h b/drivers/net/wimax/i2400m/i2400m-usb.h index 6650fde99e1d..9f1e947f3557 100644 --- a/drivers/net/wimax/i2400m/i2400m-usb.h +++ b/drivers/net/wimax/i2400m/i2400m-usb.h @@ -152,6 +152,9 @@ enum { /* Device IDs */ USB_DEVICE_ID_I6050 = 0x0186, USB_DEVICE_ID_I6050_2 = 0x0188, + USB_DEVICE_ID_I6150 = 0x07d6, + USB_DEVICE_ID_I6150_2 = 0x07d7, + USB_DEVICE_ID_I6150_3 = 0x07d9, USB_DEVICE_ID_I6250 = 0x0187, }; diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index 713d033891e6..080f36303a4f 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -510,6 +510,9 @@ int i2400mu_probe(struct usb_interface *iface, switch (id->idProduct) { case USB_DEVICE_ID_I6050: case USB_DEVICE_ID_I6050_2: + case USB_DEVICE_ID_I6150: + case USB_DEVICE_ID_I6150_2: + case USB_DEVICE_ID_I6150_3: case USB_DEVICE_ID_I6250: i2400mu->i6050 = 1; break; @@ -759,6 +762,9 @@ static struct usb_device_id i2400mu_id_table[] = { { USB_DEVICE(0x8086, USB_DEVICE_ID_I6050) }, { USB_DEVICE(0x8086, USB_DEVICE_ID_I6050_2) }, + { USB_DEVICE(0x8087, USB_DEVICE_ID_I6150) }, + { USB_DEVICE(0x8087, USB_DEVICE_ID_I6150_2) }, + { USB_DEVICE(0x8087, USB_DEVICE_ID_I6150_3) }, { USB_DEVICE(0x8086, USB_DEVICE_ID_I6250) }, { USB_DEVICE(0x8086, 0x0181) }, { USB_DEVICE(0x8086, 0x1403) }, diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile index 062dfdff6364..67156efe14c4 100644 --- a/drivers/net/wireless/Makefile +++ b/drivers/net/wireless/Makefile @@ -47,7 +47,7 @@ obj-$(CONFIG_RT2X00) += rt2x00/ obj-$(CONFIG_P54_COMMON) += p54/ -obj-$(CONFIG_ATH_COMMON) += ath/ +obj-$(CONFIG_ATH_CARDS) += ath/ obj-$(CONFIG_MAC80211_HWSIM) += mac80211_hwsim.o diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 4ffb6a584cd0..44f8b3f3cbed 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -685,6 +685,14 @@ void rt2x00lib_rxdone(struct queue_entry *entry, gfp_t gfp) * to mac80211. */ rx_status = IEEE80211_SKB_RXCB(entry->skb); + + /* Ensure that all fields of rx_status are initialized + * properly. The skb->cb array was used for driver + * specific informations, so rx_status might contain + * garbage. + */ + memset(rx_status, 0, sizeof(*rx_status)); + rx_status->mactime = rxdesc.timestamp; rx_status->band = rt2x00dev->curr_band; rx_status->freq = rt2x00dev->curr_freq; diff --git a/drivers/of/base.c b/drivers/of/base.c index db8d211a0d05..2390ddb22d60 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -629,7 +629,7 @@ struct device_node *of_find_matching_node_and_match(struct device_node *from, read_unlock(&devtree_lock); return np; } -EXPORT_SYMBOL(of_find_matching_node); +EXPORT_SYMBOL(of_find_matching_node_and_match); /** * of_modalias_node - Lookup appropriate modalias for a device node diff --git a/drivers/pinctrl/pinctrl-exynos5440.c b/drivers/pinctrl/pinctrl-exynos5440.c index b8635f634e91..07db89528dc3 100644 --- a/drivers/pinctrl/pinctrl-exynos5440.c +++ b/drivers/pinctrl/pinctrl-exynos5440.c @@ -117,7 +117,7 @@ struct exynos5440_pinctrl_priv_data { }; /* list of all possible config options supported */ -struct pin_config { +static struct pin_config { char *prop_cfg; unsigned int cfg_type; } pcfgs[] = { diff --git a/drivers/pinctrl/pinctrl-samsung.c b/drivers/pinctrl/pinctrl-samsung.c index 8f31b656c4e9..864fed822f9d 100644 --- a/drivers/pinctrl/pinctrl-samsung.c +++ b/drivers/pinctrl/pinctrl-samsung.c @@ -37,7 +37,7 @@ #define FSUFFIX_LEN sizeof(FUNCTION_SUFFIX) /* list of all possible config options supported */ -struct pin_config { +static struct pin_config { char *prop_cfg; unsigned int cfg_type; } pcfgs[] = { diff --git a/drivers/pinctrl/pinctrl-samsung.h b/drivers/pinctrl/pinctrl-samsung.h index 5addfd16e3cc..e2d4e67f7e88 100644 --- a/drivers/pinctrl/pinctrl-samsung.h +++ b/drivers/pinctrl/pinctrl-samsung.h @@ -104,7 +104,7 @@ struct samsung_pinctrl_drv_data; /** * struct samsung_pin_bank: represent a controller pin-bank. - * @reg_offset: starting offset of the pin-bank registers. + * @pctl_offset: starting offset of the pin-bank registers. * @pin_base: starting pin number of the bank. * @nr_pins: number of pins included in this bank. * @func_width: width of the function selector bit field. diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 1859f71372e2..027096fe6a12 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -764,7 +764,7 @@ int rpmsg_send_offchannel_raw(struct rpmsg_channel *rpdev, u32 src, u32 dst, /* add message to the remote processor's virtqueue */ err = virtqueue_add_buf(vrp->svq, &sg, 1, 0, msg, GFP_KERNEL); - if (err < 0) { + if (err) { /* * need to reclaim the buffer here, otherwise it's lost * (memory won't leak, but rpmsg won't use it again for TX). @@ -776,8 +776,6 @@ int rpmsg_send_offchannel_raw(struct rpmsg_channel *rpdev, u32 src, u32 dst, /* tell the remote processor it has a pending message to read */ virtqueue_kick(vrp->svq); - - err = 0; out: mutex_unlock(&vrp->tx_lock); return err; @@ -980,7 +978,7 @@ static int rpmsg_probe(struct virtio_device *vdev) err = virtqueue_add_buf(vrp->rvq, &sg, 0, 1, cpu_addr, GFP_KERNEL); - WARN_ON(err < 0); /* sanity check; this can't really happen */ + WARN_ON(err); /* sanity check; this can't really happen */ } /* suppress "tx-complete" interrupts */ diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index f8a0aab218cb..5143629dedbd 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -244,7 +244,6 @@ void rtc_device_unregister(struct rtc_device *rtc) rtc_proc_del_device(rtc); device_unregister(&rtc->dev); rtc->ops = NULL; - ida_simple_remove(&rtc_ida, rtc->id); mutex_unlock(&rtc->ops_lock); put_device(&rtc->dev); } diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index 18a4f0dd78a3..8da7a5cf83c6 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -36,6 +36,7 @@ #include <linux/platform_device.h> #include <linux/rtc.h> #include <linux/sched.h> +#include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/of.h> diff --git a/drivers/scsi/csiostor/t4fw_api_stor.h b/drivers/scsi/csiostor/t4fw_api_stor.h index 1223e0d5fc07..097e52c0f8e1 100644 --- a/drivers/scsi/csiostor/t4fw_api_stor.h +++ b/drivers/scsi/csiostor/t4fw_api_stor.h @@ -40,45 +40,6 @@ * R E T U R N V A L U E S ********************************/ -enum fw_retval { - FW_SUCCESS = 0, /* completed sucessfully */ - FW_EPERM = 1, /* operation not permitted */ - FW_ENOENT = 2, /* no such file or directory */ - FW_EIO = 5, /* input/output error; hw bad */ - FW_ENOEXEC = 8, /* exec format error; inv microcode */ - FW_EAGAIN = 11, /* try again */ - FW_ENOMEM = 12, /* out of memory */ - FW_EFAULT = 14, /* bad address; fw bad */ - FW_EBUSY = 16, /* resource busy */ - FW_EEXIST = 17, /* file exists */ - FW_EINVAL = 22, /* invalid argument */ - FW_ENOSPC = 28, /* no space left on device */ - FW_ENOSYS = 38, /* functionality not implemented */ - FW_EPROTO = 71, /* protocol error */ - FW_EADDRINUSE = 98, /* address already in use */ - FW_EADDRNOTAVAIL = 99, /* cannot assigned requested address */ - FW_ENETDOWN = 100, /* network is down */ - FW_ENETUNREACH = 101, /* network is unreachable */ - FW_ENOBUFS = 105, /* no buffer space available */ - FW_ETIMEDOUT = 110, /* timeout */ - FW_EINPROGRESS = 115, /* fw internal */ - FW_SCSI_ABORT_REQUESTED = 128, /* */ - FW_SCSI_ABORT_TIMEDOUT = 129, /* */ - FW_SCSI_ABORTED = 130, /* */ - FW_SCSI_CLOSE_REQUESTED = 131, /* */ - FW_ERR_LINK_DOWN = 132, /* */ - FW_RDEV_NOT_READY = 133, /* */ - FW_ERR_RDEV_LOST = 134, /* */ - FW_ERR_RDEV_LOGO = 135, /* */ - FW_FCOE_NO_XCHG = 136, /* */ - FW_SCSI_RSP_ERR = 137, /* */ - FW_ERR_RDEV_IMPL_LOGO = 138, /* */ - FW_SCSI_UNDER_FLOW_ERR = 139, /* */ - FW_SCSI_OVER_FLOW_ERR = 140, /* */ - FW_SCSI_DDP_ERR = 141, /* DDP error*/ - FW_SCSI_TASK_ERR = 142, /* No SCSI tasks available */ -}; - enum fw_fcoe_link_sub_op { FCOE_LINK_DOWN = 0x0, FCOE_LINK_UP = 0x1, diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index dd8dc27fa32c..74ab67a169ec 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -215,7 +215,7 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) static int virtscsi_kick_event(struct virtio_scsi *vscsi, struct virtio_scsi_event_node *event_node) { - int ret; + int err; struct scatterlist sg; unsigned long flags; @@ -223,13 +223,14 @@ static int virtscsi_kick_event(struct virtio_scsi *vscsi, spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags); - ret = virtqueue_add_buf(vscsi->event_vq.vq, &sg, 0, 1, event_node, GFP_ATOMIC); - if (ret >= 0) + err = virtqueue_add_buf(vscsi->event_vq.vq, &sg, 0, 1, event_node, + GFP_ATOMIC); + if (!err) virtqueue_kick(vscsi->event_vq.vq); spin_unlock_irqrestore(&vscsi->event_vq.vq_lock, flags); - return ret; + return err; } static int virtscsi_kick_event_all(struct virtio_scsi *vscsi) @@ -410,22 +411,23 @@ static int virtscsi_kick_cmd(struct virtio_scsi_target_state *tgt, { unsigned int out_num, in_num; unsigned long flags; - int ret; + int err; + bool needs_kick = false; spin_lock_irqsave(&tgt->tgt_lock, flags); virtscsi_map_cmd(tgt, cmd, &out_num, &in_num, req_size, resp_size); spin_lock(&vq->vq_lock); - ret = virtqueue_add_buf(vq->vq, tgt->sg, out_num, in_num, cmd, gfp); + err = virtqueue_add_buf(vq->vq, tgt->sg, out_num, in_num, cmd, gfp); spin_unlock(&tgt->tgt_lock); - if (ret >= 0) - ret = virtqueue_kick_prepare(vq->vq); + if (!err) + needs_kick = virtqueue_kick_prepare(vq->vq); spin_unlock_irqrestore(&vq->vq_lock, flags); - if (ret > 0) + if (needs_kick) virtqueue_notify(vq->vq); - return ret; + return err; } static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) @@ -467,7 +469,7 @@ static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) if (virtscsi_kick_cmd(tgt, &vscsi->req_vq, cmd, sizeof cmd->req.cmd, sizeof cmd->resp.cmd, - GFP_ATOMIC) >= 0) + GFP_ATOMIC) == 0) ret = 0; else mempool_free(cmd, virtscsi_cmd_pool); diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 75c0c4f5fdf2..ab34497bcfee 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -20,6 +20,7 @@ #include <linux/spi/spi.h> #include <linux/slab.h> #include <linux/platform_data/atmel.h> +#include <linux/of.h> #include <asm/io.h> #include <asm/gpio.h> @@ -768,6 +769,10 @@ static int atmel_spi_setup(struct spi_device *spi) /* chipselect must have been muxed as GPIO (e.g. in board setup) */ npcs_pin = (unsigned int)spi->controller_data; + + if (gpio_is_valid(spi->cs_gpio)) + npcs_pin = spi->cs_gpio; + asd = spi->controller_state; if (!asd) { asd = kzalloc(sizeof(struct atmel_spi_device), GFP_KERNEL); @@ -937,8 +942,9 @@ static int atmel_spi_probe(struct platform_device *pdev) /* the spi->mode bits understood by this driver: */ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + master->dev.of_node = pdev->dev.of_node; master->bus_num = pdev->id; - master->num_chipselect = 4; + master->num_chipselect = master->dev.of_node ? 0 : 4; master->setup = atmel_spi_setup; master->transfer = atmel_spi_transfer; master->cleanup = atmel_spi_cleanup; @@ -1064,11 +1070,20 @@ static int atmel_spi_resume(struct platform_device *pdev) #define atmel_spi_resume NULL #endif +#if defined(CONFIG_OF) +static const struct of_device_id atmel_spi_dt_ids[] = { + { .compatible = "atmel,at91rm9200-spi" }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(of, atmel_spi_dt_ids); +#endif static struct platform_driver atmel_spi_driver = { .driver = { .name = "atmel_spi", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(atmel_spi_dt_ids), }, .suspend = atmel_spi_suspend, .resume = atmel_spi_resume, diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 4dd7b7ce5c5a..ad93231a8038 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -215,6 +215,10 @@ static void flush_fifo(struct s3c64xx_spi_driver_data *sdd) writel(0, regs + S3C64XX_SPI_PACKET_CNT); val = readl(regs + S3C64XX_SPI_CH_CFG); + val &= ~(S3C64XX_SPI_CH_RXCH_ON | S3C64XX_SPI_CH_TXCH_ON); + writel(val, regs + S3C64XX_SPI_CH_CFG); + + val = readl(regs + S3C64XX_SPI_CH_CFG); val |= S3C64XX_SPI_CH_SW_RST; val &= ~S3C64XX_SPI_CH_HS_EN; writel(val, regs + S3C64XX_SPI_CH_CFG); @@ -248,10 +252,6 @@ static void flush_fifo(struct s3c64xx_spi_driver_data *sdd) val = readl(regs + S3C64XX_SPI_MODE_CFG); val &= ~(S3C64XX_SPI_MODE_TXDMA_ON | S3C64XX_SPI_MODE_RXDMA_ON); writel(val, regs + S3C64XX_SPI_MODE_CFG); - - val = readl(regs + S3C64XX_SPI_CH_CFG); - val &= ~(S3C64XX_SPI_CH_RXCH_ON | S3C64XX_SPI_CH_TXCH_ON); - writel(val, regs + S3C64XX_SPI_CH_CFG); } static void s3c64xx_spi_dmacb(void *data) @@ -771,8 +771,6 @@ static int s3c64xx_spi_transfer_one_message(struct spi_master *master, if (list_is_last(&xfer->transfer_list, &msg->transfers)) cs_toggle = 1; - else - disable_cs(sdd, spi); } msg->actual_length += xfer->len; diff --git a/drivers/spi/spi-sh-hspi.c b/drivers/spi/spi-sh-hspi.c index 32f7b55fce09..60cfae51c713 100644 --- a/drivers/spi/spi-sh-hspi.c +++ b/drivers/spi/spi-sh-hspi.c @@ -290,7 +290,7 @@ static int hspi_probe(struct platform_device *pdev) } clk = clk_get(NULL, "shyway_clk"); - if (!clk) { + if (IS_ERR(clk)) { dev_err(&pdev->dev, "shyway_clk is required\n"); ret = -EINVAL; goto error0; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index ab095acdb2a8..19ee901577da 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -824,6 +824,7 @@ static void of_register_spi_devices(struct spi_master *master) struct spi_device *spi; struct device_node *nc; const __be32 *prop; + char modalias[SPI_NAME_SIZE + 4]; int rc; int len; @@ -887,7 +888,9 @@ static void of_register_spi_devices(struct spi_master *master) spi->dev.of_node = nc; /* Register the new device */ - request_module(spi->modalias); + snprintf(modalias, sizeof(modalias), "%s%s", SPI_MODULE_PREFIX, + spi->modalias); + request_module(modalias); rc = spi_add_device(spi); if (rc) { dev_err(&master->dev, "spi_device register error %s\n", diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 23f797eb7a28..57d6b29c039c 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -41,8 +41,7 @@ #include <linux/of.h> #include <linux/gpio.h> #include <linux/pinctrl/consumer.h> - -#include <plat/omap-serial.h> +#include <linux/platform_data/serial-omap.h> #define OMAP_MAX_HSUART_PORTS 6 diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6c119944bbb6..b28e66c4376a 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -43,6 +43,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) u16 cmd; u8 msix_pos; + ret = pci_enable_device(pdev); + if (ret) + return ret; + vdev->reset_works = (pci_reset_function(pdev) == 0); pci_save_state(pdev); vdev->pci_saved_state = pci_store_saved_state(pdev); @@ -51,8 +55,11 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) __func__, dev_name(&pdev->dev)); ret = vfio_config_init(vdev); - if (ret) - goto out; + if (ret) { + pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state); + pci_disable_device(pdev); + return ret; + } if (likely(!nointxmask)) vdev->pci_2_3 = pci_intx_mask_supported(pdev); @@ -77,24 +84,15 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; - ret = pci_enable_device(pdev); - if (ret) - goto out; - - return ret; - -out: - kfree(vdev->pci_saved_state); - vdev->pci_saved_state = NULL; - vfio_config_free(vdev); - return ret; + return 0; } static void vfio_pci_disable(struct vfio_pci_device *vdev) { + struct pci_dev *pdev = vdev->pdev; int bar; - pci_disable_device(vdev->pdev); + pci_disable_device(pdev); vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, @@ -104,22 +102,40 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) vfio_config_free(vdev); - pci_reset_function(vdev->pdev); - - if (pci_load_and_free_saved_state(vdev->pdev, - &vdev->pci_saved_state) == 0) - pci_restore_state(vdev->pdev); - else - pr_info("%s: Couldn't reload %s saved state\n", - __func__, dev_name(&vdev->pdev->dev)); - for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { if (!vdev->barmap[bar]) continue; - pci_iounmap(vdev->pdev, vdev->barmap[bar]); - pci_release_selected_regions(vdev->pdev, 1 << bar); + pci_iounmap(pdev, vdev->barmap[bar]); + pci_release_selected_regions(pdev, 1 << bar); vdev->barmap[bar] = NULL; } + + /* + * If we have saved state, restore it. If we can reset the device, + * even better. Resetting with current state seems better than + * nothing, but saving and restoring current state without reset + * is just busy work. + */ + if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) { + pr_info("%s: Couldn't reload %s saved state\n", + __func__, dev_name(&pdev->dev)); + + if (!vdev->reset_works) + return; + + pci_save_state(pdev); + } + + /* + * Disable INTx and MSI, presumably to avoid spurious interrupts + * during reset. Stolen from pci_reset_function() + */ + pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); + + if (vdev->reset_works) + __pci_reset_function(pdev); + + pci_restore_state(pdev); } static void vfio_pci_release(void *device_data) @@ -327,15 +343,10 @@ static long vfio_pci_ioctl(void *device_data, hdr.count > vfio_pci_get_irq_count(vdev, hdr.index)) return -EINVAL; - data = kmalloc(hdr.count * size, GFP_KERNEL); - if (!data) - return -ENOMEM; - - if (copy_from_user(data, (void __user *)(arg + minsz), - hdr.count * size)) { - kfree(data); - return -EFAULT; - } + data = memdup_user((void __user *)(arg + minsz), + hdr.count * size); + if (IS_ERR(data)) + return PTR_ERR(data); } mutex_lock(&vdev->igate); @@ -562,9 +573,9 @@ static int __init vfio_pci_init(void) return 0; -out_virqfd: - vfio_pci_virqfd_exit(); out_driver: + vfio_pci_virqfd_exit(); +out_virqfd: vfio_pci_uninit_perm_bits(); return ret; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 56097c6d072d..12c264d3b058 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -191,6 +191,17 @@ static void vfio_container_put(struct vfio_container *container) kref_put(&container->kref, vfio_container_release); } +static void vfio_group_unlock_and_free(struct vfio_group *group) +{ + mutex_unlock(&vfio.group_lock); + /* + * Unregister outside of lock. A spurious callback is harmless now + * that the group is no longer in vfio.group_list. + */ + iommu_group_unregister_notifier(group->iommu_group, &group->nb); + kfree(group); +} + /** * Group objects - create, release, get, put, search */ @@ -229,8 +240,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) minor = vfio_alloc_group_minor(group); if (minor < 0) { - mutex_unlock(&vfio.group_lock); - kfree(group); + vfio_group_unlock_and_free(group); return ERR_PTR(minor); } @@ -239,8 +249,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) if (tmp->iommu_group == iommu_group) { vfio_group_get(tmp); vfio_free_group_minor(minor); - mutex_unlock(&vfio.group_lock); - kfree(group); + vfio_group_unlock_and_free(group); return tmp; } } @@ -249,8 +258,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) group, "%d", iommu_group_id(iommu_group)); if (IS_ERR(dev)) { vfio_free_group_minor(minor); - mutex_unlock(&vfio.group_lock); - kfree(group); + vfio_group_unlock_and_free(group); return (struct vfio_group *)dev; /* ERR_PTR */ } @@ -274,16 +282,7 @@ static void vfio_group_release(struct kref *kref) device_destroy(vfio.class, MKDEV(MAJOR(vfio.devt), group->minor)); list_del(&group->vfio_next); vfio_free_group_minor(group->minor); - - mutex_unlock(&vfio.group_lock); - - /* - * Unregister outside of lock. A spurious callback is harmless now - * that the group is no longer in vfio.group_list. - */ - iommu_group_unregister_notifier(group->iommu_group, &group->nb); - - kfree(group); + vfio_group_unlock_and_free(group); } static void vfio_group_put(struct vfio_group *group) @@ -466,8 +465,9 @@ static int vfio_dev_viable(struct device *dev, void *data) { struct vfio_group *group = data; struct vfio_device *device; + struct device_driver *drv = ACCESS_ONCE(dev->driver); - if (!dev->driver || vfio_whitelisted_driver(dev->driver)) + if (!drv || vfio_whitelisted_driver(drv)) return 0; device = vfio_group_get_device(group, dev); diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 809b0de59c09..ee59b74768d9 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -10,33 +10,32 @@ static DEFINE_IDA(virtio_index_ida); static ssize_t device_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_d); return sprintf(buf, "0x%04x\n", dev->id.device); } static ssize_t vendor_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_d); return sprintf(buf, "0x%04x\n", dev->id.vendor); } static ssize_t status_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_d); return sprintf(buf, "0x%08x\n", dev->config->get_status(dev)); } static ssize_t modalias_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - + struct virtio_device *dev = dev_to_virtio(_d); return sprintf(buf, "virtio:d%08Xv%08X\n", dev->id.device, dev->id.vendor); } static ssize_t features_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d, struct virtio_device, dev); + struct virtio_device *dev = dev_to_virtio(_d); unsigned int i; ssize_t len = 0; @@ -71,10 +70,10 @@ static inline int virtio_id_match(const struct virtio_device *dev, static int virtio_dev_match(struct device *_dv, struct device_driver *_dr) { unsigned int i; - struct virtio_device *dev = container_of(_dv,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_dv); const struct virtio_device_id *ids; - ids = container_of(_dr, struct virtio_driver, driver)->id_table; + ids = drv_to_virtio(_dr)->id_table; for (i = 0; ids[i].device; i++) if (virtio_id_match(dev, &ids[i])) return 1; @@ -83,7 +82,7 @@ static int virtio_dev_match(struct device *_dv, struct device_driver *_dr) static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env) { - struct virtio_device *dev = container_of(_dv,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_dv); return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X", dev->id.device, dev->id.vendor); @@ -98,8 +97,7 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, unsigned int fbit) { unsigned int i; - struct virtio_driver *drv = container_of(vdev->dev.driver, - struct virtio_driver, driver); + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); for (i = 0; i < drv->feature_table_size; i++) if (drv->feature_table[i] == fbit) @@ -111,9 +109,8 @@ EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature); static int virtio_dev_probe(struct device *_d) { int err, i; - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - struct virtio_driver *drv = container_of(dev->dev.driver, - struct virtio_driver, driver); + struct virtio_device *dev = dev_to_virtio(_d); + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); u32 device_features; /* We have a driver! */ @@ -152,9 +149,8 @@ static int virtio_dev_probe(struct device *_d) static int virtio_dev_remove(struct device *_d) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - struct virtio_driver *drv = container_of(dev->dev.driver, - struct virtio_driver, driver); + struct virtio_device *dev = dev_to_virtio(_d); + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); drv->remove(dev); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 2a70558b36ea..d19fe3e323b4 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -139,10 +139,9 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) struct page *page = balloon_page_enqueue(vb_dev_info); if (!page) { - if (printk_ratelimit()) - dev_printk(KERN_INFO, &vb->vdev->dev, - "Out of puff! Can't get %u pages\n", - VIRTIO_BALLOON_PAGES_PER_PAGE); + dev_info_ratelimited(&vb->vdev->dev, + "Out of puff! Can't get %u pages\n", + VIRTIO_BALLOON_PAGES_PER_PAGE); /* Sleep for at least 1/5 of a second before retry. */ msleep(200); break; diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 6b1b7e184939..634f80bcdbd7 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -225,7 +225,7 @@ static void vm_notify(struct virtqueue *vq) /* We write the queue's selector into the notification register to * signal the other end */ - writel(virtqueue_get_queue_index(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); + writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); } /* Notify all virtqueues on an interrupt. */ @@ -266,7 +266,7 @@ static void vm_del_vq(struct virtqueue *vq) struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); struct virtio_mmio_vq_info *info = vq->priv; unsigned long flags, size; - unsigned int index = virtqueue_get_queue_index(vq); + unsigned int index = vq->index; spin_lock_irqsave(&vm_dev->lock, flags); list_del(&info->node); @@ -521,25 +521,33 @@ static int vm_cmdline_set(const char *device, int err; struct resource resources[2] = {}; char *str; - long long int base; + long long int base, size; + unsigned int irq; int processed, consumed = 0; struct platform_device *pdev; - resources[0].flags = IORESOURCE_MEM; - resources[1].flags = IORESOURCE_IRQ; - - resources[0].end = memparse(device, &str) - 1; + /* Consume "size" part of the command line parameter */ + size = memparse(device, &str); + /* Get "@<base>:<irq>[:<id>]" chunks */ processed = sscanf(str, "@%lli:%u%n:%d%n", - &base, &resources[1].start, &consumed, + &base, &irq, &consumed, &vm_cmdline_id, &consumed); - if (processed < 2 || processed > 3 || str[consumed]) + /* + * sscanf() must processes at least 2 chunks; also there + * must be no extra characters after the last chunk, so + * str[consumed] must be '\0' + */ + if (processed < 2 || str[consumed]) return -EINVAL; + resources[0].flags = IORESOURCE_MEM; resources[0].start = base; - resources[0].end += base; - resources[1].end = resources[1].start; + resources[0].end = base + size - 1; + + resources[1].flags = IORESOURCE_IRQ; + resources[1].start = resources[1].end = irq; if (!vm_cmdline_parent_registered) { err = device_register(&vm_cmdline_parent); diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index c33aea36598a..e3ecc94591ad 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -203,8 +203,7 @@ static void vp_notify(struct virtqueue *vq) /* we write the queue's selector into the notification register to * signal the other end */ - iowrite16(virtqueue_get_queue_index(vq), - vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); + iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); } /* Handle a configuration change: Tell driver if it wants to know. */ @@ -479,8 +478,7 @@ static void vp_del_vq(struct virtqueue *vq) list_del(&info->node); spin_unlock_irqrestore(&vp_dev->lock, flags); - iowrite16(virtqueue_get_queue_index(vq), - vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); if (vp_dev->msix_enabled) { iowrite16(VIRTIO_MSI_NO_VECTOR, @@ -830,16 +828,4 @@ static struct pci_driver virtio_pci_driver = { #endif }; -static int __init virtio_pci_init(void) -{ - return pci_register_driver(&virtio_pci_driver); -} - -module_init(virtio_pci_init); - -static void __exit virtio_pci_exit(void) -{ - pci_unregister_driver(&virtio_pci_driver); -} - -module_exit(virtio_pci_exit); +module_pci_driver(virtio_pci_driver); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index e639584b2dbd..ffd7e7da5d3b 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -93,8 +93,6 @@ struct vring_virtqueue /* Host publishes avail event idx */ bool event; - /* Number of free buffers */ - unsigned int num_free; /* Head of free buffer list. */ unsigned int free_head; /* Number we've added since last sync. */ @@ -106,9 +104,6 @@ struct vring_virtqueue /* How to notify other side. FIXME: commonalize hcalls! */ void (*notify)(struct virtqueue *vq); - /* Index of the queue */ - int queue_index; - #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; @@ -135,6 +130,13 @@ static int vring_add_indirect(struct vring_virtqueue *vq, unsigned head; int i; + /* + * We require lowmem mappings for the descriptors because + * otherwise virt_to_phys will give us bogus addresses in the + * virtqueue. + */ + gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); + desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); if (!desc) return -ENOMEM; @@ -160,7 +162,7 @@ static int vring_add_indirect(struct vring_virtqueue *vq, desc[i-1].next = 0; /* We're about to use a buffer */ - vq->num_free--; + vq->vq.num_free--; /* Use a single buffer which doesn't continue */ head = vq->free_head; @@ -174,13 +176,6 @@ static int vring_add_indirect(struct vring_virtqueue *vq, return head; } -int virtqueue_get_queue_index(struct virtqueue *_vq) -{ - struct vring_virtqueue *vq = to_vvq(_vq); - return vq->queue_index; -} -EXPORT_SYMBOL_GPL(virtqueue_get_queue_index); - /** * virtqueue_add_buf - expose buffer to other end * @vq: the struct virtqueue we're talking about. @@ -193,10 +188,7 @@ EXPORT_SYMBOL_GPL(virtqueue_get_queue_index); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * - * Returns remaining capacity of queue or a negative error - * (ie. ENOSPC). Note that it only really makes sense to treat all - * positive return values as "available": indirect buffers mean that - * we can put an entire sg[] array inside a single queue entry. + * Returns zero or a negative error (ie. ENOSPC, ENOMEM). */ int virtqueue_add_buf(struct virtqueue *_vq, struct scatterlist sg[], @@ -228,7 +220,7 @@ int virtqueue_add_buf(struct virtqueue *_vq, /* If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ - if (vq->indirect && (out + in) > 1 && vq->num_free) { + if (vq->indirect && (out + in) > 1 && vq->vq.num_free) { head = vring_add_indirect(vq, sg, out, in, gfp); if (likely(head >= 0)) goto add_head; @@ -237,9 +229,9 @@ int virtqueue_add_buf(struct virtqueue *_vq, BUG_ON(out + in > vq->vring.num); BUG_ON(out + in == 0); - if (vq->num_free < out + in) { + if (vq->vq.num_free < out + in) { pr_debug("Can't add buf len %i - avail = %i\n", - out + in, vq->num_free); + out + in, vq->vq.num_free); /* FIXME: for historical reasons, we force a notify here if * there are outgoing parts to the buffer. Presumably the * host should service the ring ASAP. */ @@ -250,7 +242,7 @@ int virtqueue_add_buf(struct virtqueue *_vq, } /* We're about to use some buffers from the free list. */ - vq->num_free -= out + in; + vq->vq.num_free -= out + in; head = vq->free_head; for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { @@ -296,7 +288,7 @@ add_head: pr_debug("Added buffer head %i to %p\n", head, vq); END_USE(vq); - return vq->num_free; + return 0; } EXPORT_SYMBOL_GPL(virtqueue_add_buf); @@ -393,13 +385,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { i = vq->vring.desc[i].next; - vq->num_free++; + vq->vq.num_free++; } vq->vring.desc[i].next = vq->free_head; vq->free_head = head; /* Plus final descriptor */ - vq->num_free++; + vq->vq.num_free++; } static inline bool more_used(const struct vring_virtqueue *vq) @@ -599,7 +591,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) return buf; } /* That should have freed everything. */ - BUG_ON(vq->num_free != vq->vring.num); + BUG_ON(vq->vq.num_free != vq->vring.num); END_USE(vq); return NULL; @@ -653,12 +645,13 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.name = name; + vq->vq.num_free = num; + vq->vq.index = index; vq->notify = notify; vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; vq->num_added = 0; - vq->queue_index = index; list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; @@ -673,7 +666,6 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; /* Put everything in free lists. */ - vq->num_free = num; vq->free_head = 0; for (i = 0; i < num-1; i++) { vq->vring.desc[i].next = i+1; |