summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/device-mapper/delay.rst41
-rw-r--r--Documentation/admin-guide/device-mapper/dm-crypt.rst4
-rw-r--r--Documentation/admin-guide/device-mapper/vdo.rst7
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt17
-rw-r--r--Documentation/arch/loongarch/irq-chip-model.rst32
-rw-r--r--Documentation/arch/s390/vfio-ap.rst30
-rw-r--r--Documentation/core-api/cleanup.rst8
-rw-r--r--Documentation/core-api/index.rst1
-rw-r--r--Documentation/driver-api/cxl/access-coordinates.rst91
-rw-r--r--Documentation/driver-api/cxl/index.rst1
-rw-r--r--Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst32
-rw-r--r--Documentation/virt/kvm/api.rst31
-rw-r--r--Documentation/virt/kvm/locking.rst32
-rw-r--r--Documentation/virt/uml/user_mode_linux_howto_v2.rst37
-rw-r--r--Documentation/watchdog/convert_drivers_to_kernel_api.rst1
-rw-r--r--MAINTAINERS11
-rw-r--r--arch/arm64/kvm/arm.c6
-rw-r--r--arch/loongarch/Kconfig7
-rw-r--r--arch/loongarch/include/asm/atomic.h2
-rw-r--r--arch/loongarch/include/asm/cpu-features.h2
-rw-r--r--arch/loongarch/include/asm/cpu.h30
-rw-r--r--arch/loongarch/include/asm/loongarch.h1
-rw-r--r--arch/loongarch/include/asm/mmu_context.h35
-rw-r--r--arch/loongarch/include/asm/percpu.h124
-rw-r--r--arch/loongarch/include/asm/pgtable.h32
-rw-r--r--arch/loongarch/include/asm/set_memory.h21
-rw-r--r--arch/loongarch/include/uapi/asm/hwcap.h1
-rw-r--r--arch/loongarch/include/uapi/asm/sigcontext.h1
-rw-r--r--arch/loongarch/kernel/acpi.c4
-rw-r--r--arch/loongarch/kernel/cpu-probe.c120
-rw-r--r--arch/loongarch/kernel/proc.c10
-rw-r--r--arch/loongarch/kernel/syscall.c4
-rw-r--r--arch/loongarch/kvm/main.c4
-rw-r--r--arch/loongarch/mm/Makefile3
-rw-r--r--arch/loongarch/mm/fault.c41
-rw-r--r--arch/loongarch/mm/pageattr.c218
-rw-r--r--arch/loongarch/pci/acpi.c1
-rw-r--r--arch/loongarch/vdso/vgetrandom-chacha.S92
-rw-r--r--arch/mips/include/asm/kvm_host.h4
-rw-r--r--arch/mips/kvm/mips.c8
-rw-r--r--arch/mips/kvm/vz.c8
-rw-r--r--arch/parisc/kernel/perf.c1
-rw-r--r--arch/riscv/kvm/main.c4
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c1
-rw-r--r--arch/s390/hypfs/inode.c1
-rw-r--r--arch/s390/kernel/debug.c1
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c1
-rw-r--r--arch/s390/kernel/sysinfo.c1
-rw-r--r--arch/s390/kernel/vdso64/vdso_user_wrapper.S14
-rw-r--r--arch/s390/kernel/vdso64/vgetrandom-chacha.S76
-rw-r--r--arch/s390/kvm/kvm-s390.c27
-rw-r--r--arch/s390/pci/pci_clp.c1
-rw-r--r--arch/sh/include/asm/irq.h6
-rw-r--r--arch/um/Kconfig1
-rw-r--r--arch/um/drivers/harddog_kern.c1
-rw-r--r--arch/um/drivers/hostaudio_kern.c2
-rw-r--r--arch/um/drivers/vector_kern.c212
-rw-r--r--arch/um/drivers/vector_kern.h4
-rw-r--r--arch/um/drivers/vector_user.c83
-rw-r--r--arch/um/include/asm/pgtable.h7
-rw-r--r--arch/um/include/asm/processor-generic.h20
-rw-r--r--arch/um/include/asm/sysrq.h8
-rw-r--r--arch/um/include/shared/skas/mm_id.h5
-rw-r--r--arch/um/include/shared/skas/skas.h2
-rw-r--r--arch/um/kernel/exec.c3
-rw-r--r--arch/um/kernel/process.c8
-rw-r--r--arch/um/kernel/reboot.c2
-rw-r--r--arch/um/kernel/skas/mmu.c12
-rw-r--r--arch/um/kernel/skas/process.c4
-rw-r--r--arch/um/kernel/skas/syscall.c34
-rw-r--r--arch/um/kernel/sysrq.c1
-rw-r--r--arch/um/kernel/time.c2
-rw-r--r--arch/um/kernel/tlb.c16
-rw-r--r--arch/um/os-Linux/file.c8
-rw-r--r--arch/um/os-Linux/skas/mem.c2
-rw-r--r--arch/um/os-Linux/skas/process.c2
-rw-r--r--arch/x86/coco/tdx/tdx.c6
-rw-r--r--arch/x86/include/asm/atomic64_32.h6
-rw-r--r--arch/x86/include/asm/cpuid.h1
-rw-r--r--arch/x86/include/asm/intel-family.h5
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h32
-rw-r--r--arch/x86/include/asm/msr-index.h34
-rw-r--r--arch/x86/include/asm/pgtable_64.h23
-rw-r--r--arch/x86/include/asm/reboot.h2
-rw-r--r--arch/x86/include/asm/svm.h20
-rw-r--r--arch/x86/include/asm/vmx.h40
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/kernel/cpu/mce/dev-mcelog.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c6
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c1
-rw-r--r--arch/x86/kernel/head_64.S20
-rw-r--r--arch/x86/kvm/cpuid.c30
-rw-r--r--arch/x86/kvm/irq.c10
-rw-r--r--arch/x86/kvm/lapic.c84
-rw-r--r--arch/x86/kvm/lapic.h3
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/mmu/mmu.c556
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h5
-rw-r--r--arch/x86/kvm/mmu/mmutrace.h1
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h63
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c6
-rw-r--r--arch/x86/kvm/reverse_cpuid.h8
-rw-r--r--arch/x86/kvm/smm.c24
-rw-r--r--arch/x86/kvm/svm/nested.c4
-rw-r--r--arch/x86/kvm/svm/svm.c87
-rw-r--r--arch/x86/kvm/svm/svm.h18
-rw-r--r--arch/x86/kvm/svm/vmenter.S8
-rw-r--r--arch/x86/kvm/vmx/capabilities.h10
-rw-r--r--arch/x86/kvm/vmx/main.c10
-rw-r--r--arch/x86/kvm/vmx/nested.c134
-rw-r--r--arch/x86/kvm/vmx/nested.h8
-rw-r--r--arch/x86/kvm/vmx/sgx.c2
-rw-r--r--arch/x86/kvm/vmx/vmx.c67
-rw-r--r--arch/x86/kvm/vmx/vmx.h9
-rw-r--r--arch/x86/kvm/vmx/vmx_onhyperv.h8
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h2
-rw-r--r--arch/x86/kvm/vmx/x86_ops.h7
-rw-r--r--arch/x86/kvm/x86.c1002
-rw-r--r--arch/x86/kvm/x86.h31
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S9
-rw-r--r--arch/x86/mm/pat/memtype.c36
-rw-r--r--arch/x86/platform/pvh/head.S161
-rw-r--r--arch/x86/um/sysrq_32.c1
-rw-r--r--arch/x86/um/sysrq_64.c1
-rw-r--r--arch/x86/xen/enlighten_pvh.c23
-rw-r--r--drivers/acpi/Kconfig2
-rw-r--r--drivers/acpi/apei/einj-cxl.c2
-rw-r--r--drivers/acpi/apei/erst-dbg.c1
-rw-r--r--drivers/acpi/apei/ghes.c2
-rw-r--r--drivers/acpi/pci_irq.c2
-rw-r--r--drivers/ata/libata-scsi.c9
-rw-r--r--drivers/auxdisplay/charlcd.c1
-rw-r--r--drivers/base/attribute_container.c48
-rw-r--r--drivers/base/auxiliary.c2
-rw-r--r--drivers/base/base.h2
-rw-r--r--drivers/base/bus.c19
-rw-r--r--drivers/base/class.c14
-rw-r--r--drivers/base/core.c168
-rw-r--r--drivers/base/dd.c2
-rw-r--r--drivers/base/devres.c2
-rw-r--r--drivers/base/driver.c2
-rw-r--r--drivers/base/firmware_loader/main.c30
-rw-r--r--drivers/base/module.c14
-rw-r--r--drivers/base/platform.c2
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c2
-rw-r--r--drivers/block/pktcdvd.c1
-rw-r--r--drivers/block/ublk_drv.c1
-rw-r--r--drivers/block/zram/zram_drv.c6
-rw-r--r--drivers/bluetooth/hci_vhci.c1
-rw-r--r--drivers/bus/fsl-mc/fsl-mc-bus.c2
-rw-r--r--drivers/bus/moxtet.c2
-rw-r--r--drivers/char/applicom.c1
-rw-r--r--drivers/char/ds1620.c1
-rw-r--r--drivers/char/dtlk.c1
-rw-r--r--drivers/char/hpet.c1
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c1
-rw-r--r--drivers/char/pc8736x_gpio.c1
-rw-r--r--drivers/char/ppdev.c1
-rw-r--r--drivers/char/scx200_gpio.c1
-rw-r--r--drivers/char/sonypi.c1
-rw-r--r--drivers/char/tpm/tpm-dev.c1
-rw-r--r--drivers/char/tpm/tpm_vtpm_proxy.c1
-rw-r--r--drivers/char/tpm/tpmrm-dev.c1
-rw-r--r--drivers/char/virtio_console.c1
-rw-r--r--drivers/counter/counter-chrdev.c1
-rw-r--r--drivers/cxl/core/cdat.c508
-rw-r--r--drivers/cxl/core/core.h4
-rw-r--r--drivers/cxl/core/mbox.c96
-rw-r--r--drivers/cxl/core/memdev.c41
-rw-r--r--drivers/cxl/core/pci.c164
-rw-r--r--drivers/cxl/core/port.c206
-rw-r--r--drivers/cxl/core/region.c81
-rw-r--r--drivers/cxl/cxl.h9
-rw-r--r--drivers/cxl/cxlmem.h27
-rw-r--r--drivers/cxl/mem.c29
-rw-r--r--drivers/cxl/pci.c91
-rw-r--r--drivers/cxl/pmem.c26
-rw-r--r--drivers/cxl/port.c2
-rw-r--r--drivers/cxl/security.c23
-rw-r--r--drivers/firewire/core-cdev.c1
-rw-r--r--drivers/firmware/arm_scmi/driver.c1
-rw-r--r--drivers/firmware/arm_scmi/raw_mode.c5
-rw-r--r--drivers/firmware/efi/capsule-loader.c1
-rw-r--r--drivers/firmware/efi/test/efi_test.c1
-rw-r--r--drivers/firmware/turris-mox-rwtm.c1
-rw-r--r--drivers/gnss/core.c1
-rw-r--r--drivers/gpio/gpio-mockup.c1
-rw-r--r--drivers/gpio/gpio-sloppy-logic-analyzer.c1
-rw-r--r--drivers/gpio/gpiolib-cdev.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c165
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c15
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c30
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c86
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c4
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dp_types.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dsc.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_spl_translate.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c71
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c31
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c80
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c64
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/spl/dc_spl.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c85
-rw-r--r--drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h15
-rw-r--r--drivers/gpu/drm/amd/display/dmub/dmub_srv.h1
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h25
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c1
-rw-r--r--drivers/gpu/drm/amd/display/modules/freesync/freesync.c2
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h2
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h10
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c6
-rw-r--r--drivers/gpu/drm/drm_file.c1
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c22
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c32
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.h2
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c1
-rw-r--r--drivers/gpu/drm/msm/msm_perf.c1
-rw-r--r--drivers/gpu/drm/msm/msm_rd.c1
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c3
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c14
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h6
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c7
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c6
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h6
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c1
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c1
-rw-r--r--drivers/hid/uhid.c1
-rw-r--r--drivers/hwmon/asus_atk0110.c1
-rw-r--r--drivers/hwmon/fschmd.c1
-rw-r--r--drivers/hwmon/w83793.c1
-rw-r--r--drivers/hwtracing/coresight/coresight-etb10.c1
-rw-r--r--drivers/hwtracing/coresight/coresight-tmc-core.c1
-rw-r--r--drivers/hwtracing/coresight/ultrasoc-smb.c1
-rw-r--r--drivers/hwtracing/intel_th/msu.c1
-rw-r--r--drivers/hwtracing/stm/core.c1
-rw-r--r--drivers/i2c/i2c-dev.c1
-rw-r--r--drivers/idle/intel_idle.c37
-rw-r--r--drivers/infiniband/core/ucma.c1
-rw-r--r--drivers/infiniband/core/user_mad.c2
-rw-r--r--drivers/infiniband/core/uverbs_main.c4
-rw-r--r--drivers/infiniband/hw/hfi1/fault.c1
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c2
-rw-r--r--drivers/input/evdev.c1
-rw-r--r--drivers/input/joydev.c1
-rw-r--r--drivers/input/keyboard/applespi.c1
-rw-r--r--drivers/input/misc/uinput.c1
-rw-r--r--drivers/input/serio/userio.c1
-rw-r--r--drivers/iommu/iommufd/fault.c1
-rw-r--r--drivers/isdn/capi/capi.c1
-rw-r--r--drivers/isdn/mISDN/timerdev.c1
-rw-r--r--drivers/leds/uleds.c1
-rw-r--r--drivers/macintosh/adb.c1
-rw-r--r--drivers/macintosh/smu.c1
-rw-r--r--drivers/md/dm-bufio.c3
-rw-r--r--drivers/md/dm-cache-target.c6
-rw-r--r--drivers/md/dm-clone-metadata.c5
-rw-r--r--drivers/md/dm-crypt.c47
-rw-r--r--drivers/md/dm-integrity.c326
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-rq.c4
-rw-r--r--drivers/md/dm-thin.c2
-rw-r--r--drivers/md/dm-vdo/data-vio.c15
-rw-r--r--drivers/md/dm-vdo/dedupe.c3
-rw-r--r--drivers/md/dm-vdo/dm-vdo-target.c29
-rw-r--r--drivers/md/dm-vdo/indexer/chapter-index.c2
-rw-r--r--drivers/md/dm-vdo/io-submitter.c1
-rw-r--r--drivers/md/dm-vdo/message-stats.c48
-rw-r--r--drivers/md/dm-vdo/message-stats.h1
-rw-r--r--drivers/md/dm-vdo/repair.c41
-rw-r--r--drivers/md/dm-vdo/status-codes.c2
-rw-r--r--drivers/md/dm-vdo/status-codes.h2
-rw-r--r--drivers/md/dm-verity-target.c23
-rw-r--r--drivers/md/dm-verity-verify-sig.c2
-rw-r--r--drivers/md/dm.c11
-rw-r--r--drivers/md/dm.h5
-rw-r--r--drivers/media/cec/core/cec-api.c1
-rw-r--r--drivers/media/mc/mc-devnode.c1
-rw-r--r--drivers/media/rc/lirc_dev.c1
-rw-r--r--drivers/media/usb/uvc/uvc_debugfs.c1
-rw-r--r--drivers/media/v4l2-core/v4l2-dev.c1
-rw-r--r--drivers/message/fusion/mptctl.c3
-rw-r--r--drivers/misc/lis3lv02d/lis3lv02d.c1
-rw-r--r--drivers/misc/mei/main.c1
-rw-r--r--drivers/misc/ntsync.c2
-rw-r--r--drivers/misc/phantom.c1
-rw-r--r--drivers/mmc/core/block.c1
-rw-r--r--drivers/mtd/ubi/cdev.c2
-rw-r--r--drivers/mtd/ubi/debug.c1
-rw-r--r--drivers/net/netdevsim/fib.c1
-rw-r--r--drivers/net/tap.c1
-rw-r--r--drivers/net/tun.c1
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/d3.c1
-rw-r--r--drivers/platform/chrome/cros_ec_debugfs.c1
-rw-r--r--drivers/platform/chrome/wilco_ec/debugfs.c1
-rw-r--r--drivers/platform/chrome/wilco_ec/event.c1
-rw-r--r--drivers/platform/chrome/wilco_ec/telemetry.c1
-rw-r--r--drivers/platform/surface/surface_aggregator_cdev.c1
-rw-r--r--drivers/platform/surface/surface_dtx.c1
-rw-r--r--drivers/pps/pps.c1
-rw-r--r--drivers/rtc/dev.c1
-rw-r--r--drivers/rtc/rtc-m41t80.c1
-rw-r--r--drivers/s390/char/fs3270.c1
-rw-r--r--drivers/s390/char/sclp_ctl.c1
-rw-r--r--drivers/s390/char/tape_char.c1
-rw-r--r--drivers/s390/char/uvdevice.c1
-rw-r--r--drivers/s390/char/vmcp.c1
-rw-r--r--drivers/s390/char/vmlogrdr.c1
-rw-r--r--drivers/s390/char/zcore.c2
-rw-r--r--drivers/s390/cio/chsc_sch.c1
-rw-r--r--drivers/s390/cio/css.c1
-rw-r--r--drivers/s390/crypto/pkey_api.c1
-rw-r--r--drivers/s390/crypto/vfio_ap_drv.c13
-rw-r--r--drivers/s390/crypto/zcrypt_api.c1
-rw-r--r--drivers/sbus/char/openprom.c1
-rw-r--r--drivers/sbus/char/uctrl.c1
-rw-r--r--drivers/scsi/cxgbi/libcxgbi.h3
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v3_hw.c2
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.c21
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.h2
-rw-r--r--drivers/scsi/lpfc/lpfc_bsg.c3
-rw-r--r--drivers/scsi/lpfc/lpfc_ct.c22
-rw-r--r--drivers/scsi/lpfc/lpfc_disc.h7
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c132
-rw-r--r--drivers/scsi/lpfc/lpfc_hbadisc.c10
-rw-r--r--drivers/scsi/lpfc/lpfc_hw.h21
-rw-r--r--drivers/scsi/lpfc/lpfc_hw4.h3
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c32
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c52
-rw-r--r--drivers/scsi/lpfc/lpfc_version.h2
-rw-r--r--drivers/scsi/lpfc/lpfc_vport.c43
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c2
-rw-r--r--drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h35
-rw-r--r--drivers/scsi/mpi3mr/mpi/mpi30_image.h13
-rw-r--r--drivers/scsi/mpi3mr/mpi/mpi30_ioc.h8
-rw-r--r--drivers/scsi/mpi3mr/mpi/mpi30_transport.h4
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr.h10
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_fw.c79
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c5
-rw-r--r--drivers/scsi/pm8001/pm8001_init.c6
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.c2
-rw-r--r--drivers/scsi/pmcraid.c2
-rw-r--r--drivers/scsi/qedf/qedf_io.c2
-rw-r--r--drivers/scsi/scsi_debug.c1
-rw-r--r--drivers/scsi/sd.c32
-rw-r--r--drivers/scsi/sg.c1
-rw-r--r--drivers/scsi/st.c5
-rw-r--r--drivers/scsi/zalon.c2
-rw-r--r--drivers/sh/intc/userimask.c5
-rw-r--r--drivers/spi/spidev.c1
-rw-r--r--drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c1
-rw-r--r--drivers/tty/tty_io.c3
-rw-r--r--drivers/ufs/host/ufs-qcom.c2
-rw-r--r--drivers/usb/gadget/function/f_fs.c2
-rw-r--r--drivers/usb/gadget/legacy/inode.c2
-rw-r--r--drivers/usb/gadget/legacy/raw_gadget.c1
-rw-r--r--drivers/usb/gadget/udc/atmel_usba_udc.c1
-rw-r--r--drivers/usb/misc/ldusb.c1
-rw-r--r--drivers/usb/mon/mon_bin.c1
-rw-r--r--drivers/usb/mon/mon_stat.c1
-rw-r--r--drivers/usb/mon/mon_text.c2
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c2
-rw-r--r--drivers/vfio/pci/mlx5/main.c2
-rw-r--r--drivers/vfio/pci/pds/lm.c2
-rw-r--r--drivers/vfio/pci/qat/main.c2
-rw-r--r--drivers/video/fbdev/core/fbcon.c10
-rw-r--r--drivers/video/fbdev/omap2/omapfb/dss/dss-of.c7
-rw-r--r--drivers/video/fbdev/sis/sis_main.c2
-rw-r--r--drivers/virt/coco/tdx-guest/tdx-guest.c1
-rw-r--r--drivers/watchdog/acquirewdt.c1
-rw-r--r--drivers/watchdog/advantechwdt.c1
-rw-r--r--drivers/watchdog/alim1535_wdt.c1
-rw-r--r--drivers/watchdog/alim7101_wdt.c1
-rw-r--r--drivers/watchdog/at91rm9200_wdt.c1
-rw-r--r--drivers/watchdog/ath79_wdt.c1
-rw-r--r--drivers/watchdog/cpu5wdt.c1
-rw-r--r--drivers/watchdog/cpwd.c1
-rw-r--r--drivers/watchdog/eurotechwdt.c1
-rw-r--r--drivers/watchdog/gef_wdt.c1
-rw-r--r--drivers/watchdog/geodewdt.c1
-rw-r--r--drivers/watchdog/ib700wdt.c1
-rw-r--r--drivers/watchdog/ibmasr.c1
-rw-r--r--drivers/watchdog/indydog.c1
-rw-r--r--drivers/watchdog/it8712f_wdt.c1
-rw-r--r--drivers/watchdog/m54xx_wdt.c1
-rw-r--r--drivers/watchdog/machzwd.c1
-rw-r--r--drivers/watchdog/mixcomwd.c1
-rw-r--r--drivers/watchdog/mtx-1_wdt.c1
-rw-r--r--drivers/watchdog/nv_tco.c1
-rw-r--r--drivers/watchdog/pc87413_wdt.c1
-rw-r--r--drivers/watchdog/pcwd.c2
-rw-r--r--drivers/watchdog/pcwd_pci.c2
-rw-r--r--drivers/watchdog/pcwd_usb.c2
-rw-r--r--drivers/watchdog/pika_wdt.c1
-rw-r--r--drivers/watchdog/rc32434_wdt.c1
-rw-r--r--drivers/watchdog/rdc321x_wdt.c1
-rw-r--r--drivers/watchdog/riowd.c1
-rw-r--r--drivers/watchdog/sa1100_wdt.c1
-rw-r--r--drivers/watchdog/sb_wdog.c1
-rw-r--r--drivers/watchdog/sbc60xxwdt.c1
-rw-r--r--drivers/watchdog/sbc7240_wdt.c1
-rw-r--r--drivers/watchdog/sbc8360.c1
-rw-r--r--drivers/watchdog/sbc_epx_c3.c1
-rw-r--r--drivers/watchdog/sbc_fitpc2_wdt.c1
-rw-r--r--drivers/watchdog/sc1200wdt.c1
-rw-r--r--drivers/watchdog/sc520_wdt.c1
-rw-r--r--drivers/watchdog/sch311x_wdt.c1
-rw-r--r--drivers/watchdog/scx200_wdt.c1
-rw-r--r--drivers/watchdog/smsc37b787_wdt.c1
-rw-r--r--drivers/watchdog/w83877f_wdt.c1
-rw-r--r--drivers/watchdog/w83977f_wdt.c1
-rw-r--r--drivers/watchdog/wafer5823wdt.c1
-rw-r--r--drivers/watchdog/wdrtas.c2
-rw-r--r--drivers/watchdog/wdt.c2
-rw-r--r--drivers/watchdog/wdt285.c1
-rw-r--r--drivers/watchdog/wdt977.c1
-rw-r--r--drivers/watchdog/wdt_pci.c2
-rw-r--r--drivers/xen/Kconfig1
-rw-r--r--drivers/xen/acpi.c50
-rw-r--r--drivers/xen/evtchn.c1
-rw-r--r--drivers/xen/mcelog.c1
-rw-r--r--drivers/xen/pci.c13
-rw-r--r--drivers/xen/privcmd.c32
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability.c2
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c78
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c1
-rw-r--r--fs/bcachefs/backpointers.c2
-rw-r--r--fs/bcachefs/bcachefs.h3
-rw-r--r--fs/bcachefs/bcachefs_format.h6
-rw-r--r--fs/bcachefs/bkey.h8
-rw-r--r--fs/bcachefs/bkey_methods.c2
-rw-r--r--fs/bcachefs/bkey_methods.h2
-rw-r--r--fs/bcachefs/btree_gc.c8
-rw-r--r--fs/bcachefs/btree_io.c6
-rw-r--r--fs/bcachefs/btree_node_scan.c2
-rw-r--r--fs/bcachefs/btree_trans_commit.c108
-rw-r--r--fs/bcachefs/btree_update.h3
-rw-r--r--fs/bcachefs/chardev.c1
-rw-r--r--fs/bcachefs/data_update.c2
-rw-r--r--fs/bcachefs/disk_accounting.c82
-rw-r--r--fs/bcachefs/disk_accounting.h29
-rw-r--r--fs/bcachefs/disk_accounting_types.h2
-rw-r--r--fs/bcachefs/error.c14
-rw-r--r--fs/bcachefs/error.h2
-rw-r--r--fs/bcachefs/fsck.c295
-rw-r--r--fs/bcachefs/inode.c12
-rw-r--r--fs/bcachefs/inode.h1
-rw-r--r--fs/bcachefs/io_read.c4
-rw-r--r--fs/bcachefs/io_write.c4
-rw-r--r--fs/bcachefs/journal_io.c2
-rw-r--r--fs/bcachefs/logged_ops.c13
-rw-r--r--fs/bcachefs/recovery.c7
-rw-r--r--fs/bcachefs/recovery_passes_types.h2
-rw-r--r--fs/bcachefs/reflink.c2
-rw-r--r--fs/bcachefs/replicas.c18
-rw-r--r--fs/bcachefs/replicas.h2
-rw-r--r--fs/bcachefs/sb-clean.c1
-rw-r--r--fs/bcachefs/sb-downgrade.c9
-rw-r--r--fs/bcachefs/sb-errors.c6
-rw-r--r--fs/bcachefs/sb-errors.h2
-rw-r--r--fs/bcachefs/sb-errors_format.h39
-rw-r--r--fs/bcachefs/six.c12
-rw-r--r--fs/bcachefs/snapshot.c3
-rw-r--r--fs/bcachefs/subvolume.c54
-rw-r--r--fs/bcachefs/super-io.c7
-rw-r--r--fs/bcachefs/tests.c2
-rw-r--r--fs/bcachefs/thread_with_file.c2
-rw-r--r--fs/ceph/addr.c1
-rw-r--r--fs/ceph/caps.c29
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/mds_client.c25
-rw-r--r--fs/ceph/mds_client.h7
-rw-r--r--fs/ceph/super.c1
-rw-r--r--fs/ceph/super.h7
-rw-r--r--fs/debugfs/file.c1
-rw-r--r--fs/dlm/debug_fs.c1
-rw-r--r--fs/efivarfs/file.c1
-rw-r--r--fs/fsopen.c1
-rw-r--r--fs/fuse/control.c4
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/netfs/internal.h1
-rw-r--r--fs/netfs/misc.c74
-rw-r--r--fs/netfs/write_issue.c12
-rw-r--r--fs/nsfs.c1
-rw-r--r--fs/ocfs2/aops.c5
-rw-r--r--fs/ocfs2/extent_map.c8
-rw-r--r--fs/ocfs2/refcounttree.c26
-rw-r--r--fs/ocfs2/xattr.c11
-rw-r--r--fs/overlayfs/file.c2
-rw-r--r--fs/pipe.c1
-rw-r--r--fs/smb/client/cifsencrypt.c151
-rw-r--r--fs/smb/client/cifsglob.h2
-rw-r--r--fs/smb/client/sess.c2
-rw-r--r--fs/smb/client/smb2misc.c28
-rw-r--r--fs/smb/client/smb2ops.c47
-rw-r--r--fs/smb/client/smb2pdu.c10
-rw-r--r--fs/smb/client/smb2proto.h2
-rw-r--r--fs/smb/client/smb2transport.c32
-rw-r--r--fs/smb/common/smb2pdu.h6
-rw-r--r--fs/smb/server/connection.c2
-rw-r--r--fs/smb/server/ksmbd_netlink.h2
-rw-r--r--fs/smb/server/oplock.c4
-rw-r--r--fs/smb/server/server.c2
-rw-r--r--fs/smb/server/smb2pdu.c35
-rw-r--r--fs/smb/server/smb2pdu.h4
-rw-r--r--fs/smb/server/smb_common.c2
-rw-r--r--fs/smb/server/vfs_cache.h4
-rw-r--r--fs/smb/server/xattr.h2
-rw-r--r--fs/ubifs/debug.c2
-rw-r--r--include/cxl/einj.h (renamed from include/linux/einj-cxl.h)0
-rw-r--r--include/cxl/event.h (renamed from include/linux/cxl-event.h)0
-rw-r--r--include/cxl/mailbox.h28
-rw-r--r--include/linux/acpi.h1
-rw-r--r--include/linux/attribute_container.h6
-rw-r--r--include/linux/auxiliary_bus.h2
-rw-r--r--include/linux/bitmap.h140
-rw-r--r--include/linux/bits.h15
-rw-r--r--include/linux/ceph/osd_client.h2
-rw-r--r--include/linux/cleanup.h136
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/cpumask.h212
-rw-r--r--include/linux/debugfs.h1
-rw-r--r--include/linux/device-mapper.h1
-rw-r--r--include/linux/device/bus.h6
-rw-r--r--include/linux/device/class.h2
-rw-r--r--include/linux/device/driver.h2
-rw-r--r--include/linux/find.h50
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/fsl/mc.h2
-rw-r--r--include/linux/hugetlb.h10
-rw-r--r--include/linux/kvm_host.h18
-rw-r--r--include/linux/nodemask.h86
-rw-r--r--include/linux/platform_device.h2
-rw-r--r--include/trace/events/dma.h37
-rw-r--r--include/uapi/linux/bits.h3
-rw-r--r--include/uapi/linux/const.h17
-rw-r--r--include/uapi/xen/privcmd.h7
-rw-r--r--include/xen/acpi.h27
-rw-r--r--include/xen/interface/elfnote.h93
-rw-r--r--include/xen/interface/physdev.h17
-rw-r--r--include/xen/pci.h6
-rw-r--r--kernel/bpf/bpf_iter.c1
-rw-r--r--kernel/events/core.c1
-rw-r--r--kernel/jump_label.c34
-rw-r--r--kernel/locking/lockdep.c53
-rw-r--r--kernel/locking/lockdep_proc.c2
-rw-r--r--kernel/locking/rwsem.c22
-rw-r--r--kernel/module/Kconfig77
-rw-r--r--kernel/module/debug_kmemleak.c18
-rw-r--r--kernel/module/sysfs.c63
-rw-r--r--kernel/power/user.c1
-rw-r--r--kernel/relay.c1
-rw-r--r--kernel/static_call_inline.c13
-rw-r--r--kernel/time/posix-clock.c1
-rw-r--r--kernel/trace/rv/rv.c3
-rw-r--r--kernel/trace/rv/rv_reactors.c1
-rw-r--r--kernel/trace/trace.c3
-rw-r--r--lib/list-test.c6
-rw-r--r--lib/test_bits.c34
-rw-r--r--mm/Kconfig1
-rw-r--r--mm/damon/Kconfig2
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/gup.c1
-rw-r--r--mm/huge_memory.c1
-rw-r--r--mm/hugetlb.c17
-rw-r--r--mm/kfence/report.c2
-rw-r--r--mm/memfd.c18
-rw-r--r--mm/memory-tiers.c6
-rw-r--r--mm/migrate.c2
-rw-r--r--net/ceph/messenger.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c2
-rw-r--r--net/rfkill/core.c1
-rw-r--r--net/socket.c1
-rw-r--r--net/sunrpc/cache.c4
-rw-r--r--net/sunrpc/rpc_pipe.c1
-rw-r--r--samples/vfio-mdev/mtty.c2
-rw-r--r--scripts/Makefile.modinst2
-rw-r--r--scripts/coccinelle/api/stream_open.cocci1
-rw-r--r--scripts/coccinelle/api/string_choices.cocci259
-rw-r--r--security/tomoyo/Kconfig15
-rw-r--r--security/tomoyo/Makefile8
-rw-r--r--security/tomoyo/common.c14
-rw-r--r--security/tomoyo/common.h72
-rw-r--r--security/tomoyo/domain.c9
-rw-r--r--security/tomoyo/gc.c3
-rw-r--r--security/tomoyo/hooks.h (renamed from security/tomoyo/tomoyo.c)110
-rw-r--r--security/tomoyo/init.c366
-rw-r--r--security/tomoyo/load_policy.c12
-rw-r--r--security/tomoyo/proxy.c82
-rw-r--r--security/tomoyo/securityfs_if.c10
-rw-r--r--security/tomoyo/util.c3
-rw-r--r--sound/core/control.c1
-rw-r--r--sound/core/oss/mixer_oss.c1
-rw-r--r--sound/core/oss/pcm_oss.c1
-rw-r--r--sound/core/pcm_native.c2
-rw-r--r--sound/core/rawmidi.c1
-rw-r--r--sound/core/seq/seq_clientmgr.c1
-rw-r--r--sound/core/timer.c1
-rw-r--r--sound/oss/dmasound/dmasound_core.c3
-rw-r--r--sound/soc/intel/avs/debugfs.c3
-rw-r--r--tools/include/linux/linkage.h4
-rw-r--r--tools/objtool/arch/loongarch/decode.c11
-rw-r--r--tools/objtool/check.c23
-rw-r--r--tools/objtool/include/objtool/elf.h1
-rw-r--r--tools/testing/cxl/Kbuild2
-rw-r--r--tools/testing/cxl/mock_acpi.c2
-rw-r--r--tools/testing/cxl/test/mem.c44
-rw-r--r--tools/testing/cxl/test/mock.c10
-rw-r--r--tools/testing/selftests/kvm/.gitignore4
-rw-r--r--tools/testing/selftests/kvm/Makefile4
-rw-r--r--tools/testing/selftests/kvm/coalesced_io_test.c236
-rw-r--r--tools/testing/selftests/kvm/guest_print_test.c19
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h28
-rw-r--r--tools/testing/selftests/kvm/include/s390x/debug_print.h69
-rw-r--r--tools/testing/selftests/kvm/include/s390x/processor.h5
-rw-r--r--tools/testing/selftests/kvm/include/s390x/sie.h240
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/apic.h21
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/hyperv.h18
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h7
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c85
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c10
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/hyperv.c67
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c69
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c19
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c12
-rw-r--r--tools/testing/selftests/kvm/s390x/cmma_test.c7
-rw-r--r--tools/testing/selftests/kvm/s390x/config2
-rw-r--r--tools/testing/selftests/kvm/s390x/debug_test.c4
-rw-r--r--tools/testing/selftests/kvm/s390x/memop.c4
-rw-r--r--tools/testing/selftests/kvm/s390x/tprot.c5
-rw-r--r--tools/testing/selftests/kvm/s390x/ucontrol_test.c332
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c29
-rw-r--r--tools/testing/selftests/kvm/x86_64/debug_regs.c11
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_smoke_test.c32
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_state_test.c54
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c1
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_standalone_test_x86.c2
-rw-r--r--tools/testing/shared/maple-shared.h4
-rw-r--r--tools/testing/shared/shared.h4
-rw-r--r--tools/testing/shared/shared.mk4
-rw-r--r--tools/testing/shared/xarray-shared.h4
-rw-r--r--virt/kvm/coalesced_mmio.c31
-rw-r--r--virt/kvm/kvm_main.c282
729 files changed, 9950 insertions, 5092 deletions
diff --git a/Documentation/admin-guide/device-mapper/delay.rst b/Documentation/admin-guide/device-mapper/delay.rst
index 917ba8c33359..4d667228e744 100644
--- a/Documentation/admin-guide/device-mapper/delay.rst
+++ b/Documentation/admin-guide/device-mapper/delay.rst
@@ -3,29 +3,52 @@ dm-delay
========
Device-Mapper's "delay" target delays reads and/or writes
-and maps them to different devices.
+and/or flushs and optionally maps them to different devices.
-Parameters::
+Arguments::
<device> <offset> <delay> [<write_device> <write_offset> <write_delay>
[<flush_device> <flush_offset> <flush_delay>]]
-With separate write parameters, the first set is only used for reads.
+Table line has to either have 3, 6 or 9 arguments:
+
+3: apply offset and delay to read, write and flush operations on device
+
+6: apply offset and delay to device, also apply write_offset and write_delay
+ to write and flush operations on optionally different write_device with
+ optionally different sector offset
+
+9: same as 6 arguments plus define flush_offset and flush_delay explicitely
+ on/with optionally different flush_device/flush_offset.
+
Offsets are specified in sectors.
+
Delays are specified in milliseconds.
+
Example scripts
===============
::
-
#!/bin/sh
- # Create device delaying rw operation for 500ms
- echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
+ #
+ # Create mapped device named "delayed" delaying read, write and flush operations for 500ms.
+ #
+ dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 500"
::
+ #!/bin/sh
+ #
+ # Create mapped device delaying write and flush operations for 400ms and
+ # splitting reads to device $1 but writes and flushs to different device $2
+ # to different offsets of 2048 and 4096 sectors respectively.
+ #
+ dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 2048 0 $2 4096 400"
+::
#!/bin/sh
- # Create device delaying only write operation for 500ms and
- # splitting reads and writes to different devices $1 $2
- echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
+ #
+ # Create mapped device delaying reads for 50ms, writes for 100ms and flushs for 333ms
+ # onto the same backing device at offset 0 sectors.
+ #
+ dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 50 $2 0 100 $1 0 333"
diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst
index 48a48bd09372..9f8139ff97d6 100644
--- a/Documentation/admin-guide/device-mapper/dm-crypt.rst
+++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst
@@ -160,6 +160,10 @@ iv_large_sectors
The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
if this flag is specified.
+integrity_key_size:<bytes>
+ Use an integrity key of <bytes> size instead of using an integrity key size
+ of the digest size of the used HMAC algorithm.
+
Module parameters::
max_read_size
diff --git a/Documentation/admin-guide/device-mapper/vdo.rst b/Documentation/admin-guide/device-mapper/vdo.rst
index c69ac186863a..a14e6d3e787c 100644
--- a/Documentation/admin-guide/device-mapper/vdo.rst
+++ b/Documentation/admin-guide/device-mapper/vdo.rst
@@ -251,7 +251,12 @@ The messages are:
by the vdostats userspace program to interpret the output
buffer.
- dump:
+ config:
+ Outputs useful vdo configuration information. Mostly used
+ by users who want to recreate a similar VDO volume and
+ want to know the creation configuration used.
+
+ dump:
Dumps many internal structures to the system log. This is
not always safe to run, so it should only be used to debug
a hung vdo. Optional parameters to specify structures to
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bb48ae24ae69..1518343bbe22 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2677,6 +2677,23 @@
Default is Y (on).
+ kvm.enable_virt_at_load=[KVM,ARM64,LOONGARCH,MIPS,RISCV,X86]
+ If enabled, KVM will enable virtualization in hardware
+ when KVM is loaded, and disable virtualization when KVM
+ is unloaded (if KVM is built as a module).
+
+ If disabled, KVM will dynamically enable and disable
+ virtualization on-demand when creating and destroying
+ VMs, i.e. on the 0=>1 and 1=>0 transitions of the
+ number of VMs.
+
+ Enabling virtualization at module lode avoids potential
+ latency for creation of the 0=>1 VM, as KVM serializes
+ virtualization enabling across all online CPUs. The
+ "cost" of enabling virtualization when KVM is loaded,
+ is that doing so may interfere with using out-of-tree
+ hypervisors that want to "own" virtualization hardware.
+
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
Default is false (don't support).
diff --git a/Documentation/arch/loongarch/irq-chip-model.rst b/Documentation/arch/loongarch/irq-chip-model.rst
index 7988f4192363..6dd48256e39f 100644
--- a/Documentation/arch/loongarch/irq-chip-model.rst
+++ b/Documentation/arch/loongarch/irq-chip-model.rst
@@ -85,6 +85,38 @@ to CPUINTC directly::
| Devices |
+---------+
+Advanced Extended IRQ model
+===========================
+
+In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
+to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go
+to AVECINTC, and then go to CPUINTC directly, while all other devices interrupts
+go to PCH-PIC/PCH-LPC and gathered by EIOINTC, and then go to CPUINTC directly::
+
+ +-----+ +-----------------------+ +-------+
+ | IPI | --> | CPUINTC | <-- | Timer |
+ +-----+ +-----------------------+ +-------+
+ ^ ^ ^
+ | | |
+ +---------+ +----------+ +---------+ +-------+
+ | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
+ +---------+ +----------+ +---------+ +-------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-PIC | | PCH-MSI |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +---------+ +---------+ +---------+
+ | Devices | | PCH-LPC | | Devices |
+ +---------+ +---------+ +---------+
+ ^
+ |
+ +---------+
+ | Devices |
+ +---------+
+
ACPI-related definitions
========================
diff --git a/Documentation/arch/s390/vfio-ap.rst b/Documentation/arch/s390/vfio-ap.rst
index ea744cbc8687..eba1991fbdba 100644
--- a/Documentation/arch/s390/vfio-ap.rst
+++ b/Documentation/arch/s390/vfio-ap.rst
@@ -999,6 +999,36 @@ the vfio_ap mediated device to which it is assigned as long as each new APQN
resulting from plugging it in references a queue device bound to the vfio_ap
device driver.
+Driver Features
+===============
+The vfio_ap driver exposes a sysfs file containing supported features.
+This exists so third party tools (like Libvirt and mdevctl) can query the
+availability of specific features.
+
+The features list can be found here: /sys/bus/matrix/devices/matrix/features
+
+Entries are space delimited. Each entry consists of a combination of
+alphanumeric and underscore characters.
+
+Example:
+cat /sys/bus/matrix/devices/matrix/features
+guest_matrix dyn ap_config
+
+the following features are advertised:
+
+---------------+---------------------------------------------------------------+
+| Flag | Description |
++==============+===============================================================+
+| guest_matrix | guest_matrix attribute exists. It reports the matrix of |
+| | adapters and domains that are or will be passed through to a |
+| | guest when the mdev is attached to it. |
++--------------+---------------------------------------------------------------+
+| dyn | Indicates hot plug/unplug of AP adapters, domains and control |
+| | domains for a guest to which the mdev is attached. |
++------------+-----------------------------------------------------------------+
+| ap_config | ap_config interface for one-shot modifications to mdev config |
++--------------+---------------------------------------------------------------+
+
Limitations
===========
Live guest migration is not supported for guests using AP devices without
diff --git a/Documentation/core-api/cleanup.rst b/Documentation/core-api/cleanup.rst
new file mode 100644
index 000000000000..527eb2f8ec6e
--- /dev/null
+++ b/Documentation/core-api/cleanup.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Scope-based Cleanup Helpers
+===========================
+
+.. kernel-doc:: include/linux/cleanup.h
+ :doc: scope-based cleanup helpers
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index e18a2ffe0787..a331d2c814f5 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -35,6 +35,7 @@ Library functionality that is used throughout the kernel.
kobject
kref
+ cleanup
assoc_array
xarray
maple_tree
diff --git a/Documentation/driver-api/cxl/access-coordinates.rst b/Documentation/driver-api/cxl/access-coordinates.rst
new file mode 100644
index 000000000000..b07950ea30c9
--- /dev/null
+++ b/Documentation/driver-api/cxl/access-coordinates.rst
@@ -0,0 +1,91 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==================================
+CXL Access Coordinates Computation
+==================================
+
+Shared Upstream Link Calculation
+================================
+For certain CXL region construction with endpoints behind CXL switches (SW) or
+Root Ports (RP), there is the possibility of the total bandwidth for all
+the endpoints behind a switch being more than the switch upstream link.
+A similar situation can occur within the host, upstream of the root ports.
+The CXL driver performs an additional pass after all the targets have
+arrived for a region in order to recalculate the bandwidths with possible
+upstream link being a limiting factor in mind.
+
+The algorithm assumes the configuration is a symmetric topology as that
+maximizes performance. When asymmetric topology is detected, the calculation
+is aborted. An asymmetric topology is detected during topology walk where the
+number of RPs detected as a grandparent is not equal to the number of devices
+iterated in the same iteration loop. The assumption is made that subtle
+asymmetry in properties does not happen and all paths to EPs are equal.
+
+There can be multiple switches under an RP. There can be multiple RPs under
+a CXL Host Bridge (HB). There can be multiple HBs under a CXL Fixed Memory
+Window Structure (CFMWS).
+
+An example hierarchy:
+
+> CFMWS 0
+> |
+> _________|_________
+> | |
+> ACPI0017-0 ACPI0017-1
+> GP0/HB0/ACPI0016-0 GP1/HB1/ACPI0016-1
+> | | | |
+> RP0 RP1 RP2 RP3
+> | | | |
+> SW 0 SW 1 SW 2 SW 3
+> | | | | | | | |
+> EP0 EP1 EP2 EP3 EP4 EP5 EP6 EP7
+
+Computation for the example hierarchy:
+
+Min (GP0 to CPU BW,
+ Min(SW 0 Upstream Link to RP0 BW,
+ Min(SW0SSLBIS for SW0DSP0 (EP0), EP0 DSLBIS, EP0 Upstream Link) +
+ Min(SW0SSLBIS for SW0DSP1 (EP1), EP1 DSLBIS, EP1 Upstream link)) +
+ Min(SW 1 Upstream Link to RP1 BW,
+ Min(SW1SSLBIS for SW1DSP0 (EP2), EP2 DSLBIS, EP2 Upstream Link) +
+ Min(SW1SSLBIS for SW1DSP1 (EP3), EP3 DSLBIS, EP3 Upstream link))) +
+Min (GP1 to CPU BW,
+ Min(SW 2 Upstream Link to RP2 BW,
+ Min(SW2SSLBIS for SW2DSP0 (EP4), EP4 DSLBIS, EP4 Upstream Link) +
+ Min(SW2SSLBIS for SW2DSP1 (EP5), EP5 DSLBIS, EP5 Upstream link)) +
+ Min(SW 3 Upstream Link to RP3 BW,
+ Min(SW3SSLBIS for SW3DSP0 (EP6), EP6 DSLBIS, EP6 Upstream Link) +
+ Min(SW3SSLBIS for SW3DSP1 (EP7), EP7 DSLBIS, EP7 Upstream link))))
+
+The calculation starts at cxl_region_shared_upstream_perf_update(). A xarray
+is created to collect all the endpoint bandwidths via the
+cxl_endpoint_gather_bandwidth() function. The min() of bandwidth from the
+endpoint CDAT and the upstream link bandwidth is calculated. If the endpoint
+has a CXL switch as a parent, then min() of calculated bandwidth and the
+bandwidth from the SSLBIS for the switch downstream port that is associated
+with the endpoint is calculated. The final bandwidth is stored in a
+'struct cxl_perf_ctx' in the xarray indexed by a device pointer. If the
+endpoint is direct attached to a root port (RP), the device pointer would be an
+RP device. If the endpoint is behind a switch, the device pointer would be the
+upstream device of the parent switch.
+
+At the next stage, the code walks through one or more switches if they exist
+in the topology. For endpoints directly attached to RPs, this step is skipped.
+If there is another switch upstream, the code takes the min() of the current
+gathered bandwidth and the upstream link bandwidth. If there's a switch
+upstream, then the SSLBIS of the upstream switch.
+
+Once the topology walk reaches the RP, whether it's direct attached endpoints
+or walking through the switch(es), cxl_rp_gather_bandwidth() is called. At
+this point all the bandwidths are aggregated per each host bridge, which is
+also the index for the resulting xarray.
+
+The next step is to take the min() of the per host bridge bandwidth and the
+bandwidth from the Generic Port (GP). The bandwidths for the GP is retrieved
+via ACPI tables SRAT/HMAT. The min bandwidth are aggregated under the same
+ACPI0017 device to form a new xarray.
+
+Finally, the cxl_region_update_bandwidth() is called and the aggregated
+bandwidth from all the members of the last xarray is updated for the
+access coordinates residing in the cxl region (cxlr) context.
diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst
index 12b82725d322..965ba90e8fb7 100644
--- a/Documentation/driver-api/cxl/index.rst
+++ b/Documentation/driver-api/cxl/index.rst
@@ -8,6 +8,7 @@ Compute Express Link
:maxdepth: 1
memory-devices
+ access-coordinates
maturity-map
diff --git a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst
index f1e9ab18206c..472761938682 100644
--- a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst
+++ b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst
@@ -87,6 +87,38 @@ PCH-LPC/PCH-MSI,然后被EIOINTC统一收集,再直接到达CPUINTC::
| Devices |
+---------+
+高级扩展IRQ模型
+===============
+
+在这种模型里面,IPI(Inter-Processor Interrupt)和CPU本地时钟中断直接发送到CPUINTC,
+CPU串口(UARTs)中断发送到LIOINTC,PCH-MSI中断发送到AVECINTC,而后通过AVECINTC直接
+送达CPUINTC,而其他所有设备的中断则分别发送到所连接的PCH-PIC/PCH-LPC,然后由EIOINTC
+统一收集,再直接到达CPUINTC::
+
+ +-----+ +-----------------------+ +-------+
+ | IPI | --> | CPUINTC | <-- | Timer |
+ +-----+ +-----------------------+ +-------+
+ ^ ^ ^
+ | | |
+ +---------+ +----------+ +---------+ +-------+
+ | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
+ +---------+ +----------+ +---------+ +-------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-PIC | | PCH-MSI |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +---------+ +---------+ +---------+
+ | Devices | | PCH-LPC | | Devices |
+ +---------+ +---------+ +---------+
+ ^
+ |
+ +---------+
+ | Devices |
+ +---------+
+
ACPI相关的定义
==============
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index b3be87489108..e32471977d0a 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4214,7 +4214,9 @@ whether or not KVM_CAP_X86_USER_SPACE_MSR's KVM_MSR_EXIT_REASON_FILTER is
enabled. If KVM_MSR_EXIT_REASON_FILTER is enabled, KVM will exit to userspace
on denied accesses, i.e. userspace effectively intercepts the MSR access. If
KVM_MSR_EXIT_REASON_FILTER is not enabled, KVM will inject a #GP into the guest
-on denied accesses.
+on denied accesses. Note, if an MSR access is denied during emulation of MSR
+load/stores during VMX transitions, KVM ignores KVM_MSR_EXIT_REASON_FILTER.
+See the below warning for full details.
If an MSR access is allowed by userspace, KVM will emulate and/or virtualize
the access in accordance with the vCPU model. Note, KVM may still ultimately
@@ -4229,9 +4231,22 @@ filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
an error.
.. warning::
- MSR accesses as part of nested VM-Enter/VM-Exit are not filtered.
- This includes both writes to individual VMCS fields and reads/writes
- through the MSR lists pointed to by the VMCS.
+ MSR accesses that are side effects of instruction execution (emulated or
+ native) are not filtered as hardware does not honor MSR bitmaps outside of
+ RDMSR and WRMSR, and KVM mimics that behavior when emulating instructions
+ to avoid pointless divergence from hardware. E.g. RDPID reads MSR_TSC_AUX,
+ SYSENTER reads the SYSENTER MSRs, etc.
+
+ MSRs that are loaded/stored via dedicated VMCS fields are not filtered as
+ part of VM-Enter/VM-Exit emulation.
+
+ MSRs that are loaded/store via VMX's load/store lists _are_ filtered as part
+ of VM-Enter/VM-Exit emulation. If an MSR access is denied on VM-Enter, KVM
+ synthesizes a consistency check VM-Exit(EXIT_REASON_MSR_LOAD_FAIL). If an
+ MSR access is denied on VM-Exit, KVM synthesizes a VM-Abort. In short, KVM
+ extends Intel's architectural list of MSRs that cannot be loaded/saved via
+ the VM-Enter/VM-Exit MSR list. It is platform owner's responsibility to
+ to communicate any such restrictions to their end users.
x2APIC MSR accesses cannot be filtered (KVM silently ignores filters that
cover any x2APIC MSRs).
@@ -8082,6 +8097,14 @@ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if
guest CPUID on writes to MISC_ENABLE if
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is
disabled.
+
+KVM_X86_QUIRK_SLOT_ZAP_ALL By default, KVM invalidates all SPTEs in
+ fast way for memslot deletion when VM type
+ is KVM_X86_DEFAULT_VM.
+ When this quirk is disabled or when VM type
+ is other than KVM_X86_DEFAULT_VM, KVM zaps
+ only leaf SPTEs that are within the range of
+ the memslot being deleted.
=================================== ============================================
7.32 KVM_CAP_MAX_VCPU_ID
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index 02880d5552d5..20a9a37d1cdd 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -11,6 +11,8 @@ The acquisition orders for mutexes are as follows:
- cpus_read_lock() is taken outside kvm_lock
+- kvm_usage_lock is taken outside cpus_read_lock()
+
- kvm->lock is taken outside vcpu->mutex
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
@@ -24,6 +26,13 @@ The acquisition orders for mutexes are as follows:
are taken on the waiting side when modifying memslots, so MMU notifiers
must not take either kvm->slots_lock or kvm->slots_arch_lock.
+cpus_read_lock() vs kvm_lock:
+
+- Taking cpus_read_lock() outside of kvm_lock is problematic, despite that
+ being the official ordering, as it is quite easy to unknowingly trigger
+ cpus_read_lock() while holding kvm_lock. Use caution when walking vm_list,
+ e.g. avoid complex operations when possible.
+
For SRCU:
- ``synchronize_srcu(&kvm->srcu)`` is called inside critical sections
@@ -227,10 +236,16 @@ time it will be set using the Dirty tracking mechanism described above.
:Type: mutex
:Arch: any
:Protects: - vm_list
- - kvm_usage_count
+
+``kvm_usage_lock``
+^^^^^^^^^^^^^^^^^^
+
+:Type: mutex
+:Arch: any
+:Protects: - kvm_usage_count
- hardware virtualization enable/disable
-:Comment: KVM also disables CPU hotplug via cpus_read_lock() during
- enable/disable.
+:Comment: Exists to allow taking cpus_read_lock() while kvm_usage_count is
+ protected, which simplifies the virtualization enabling logic.
``kvm->mn_invalidate_lock``
^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -290,11 +305,12 @@ time it will be set using the Dirty tracking mechanism described above.
wakeup.
``vendor_module_lock``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^
:Type: mutex
:Arch: x86
:Protects: loading a vendor module (kvm_amd or kvm_intel)
-:Comment: Exists because using kvm_lock leads to deadlock. cpu_hotplug_lock is
- taken outside of kvm_lock, e.g. in KVM's CPU online/offline callbacks, and
- many operations need to take cpu_hotplug_lock when loading a vendor module,
- e.g. updating static calls.
+:Comment: Exists because using kvm_lock leads to deadlock. kvm_lock is taken
+ in notifiers, e.g. __kvmclock_cpufreq_notifier(), that may be invoked while
+ cpu_hotplug_lock is held, e.g. from cpufreq_boost_trigger_state(), and many
+ operations need to take cpu_hotplug_lock when loading a vendor module, e.g.
+ updating static calls.
diff --git a/Documentation/virt/uml/user_mode_linux_howto_v2.rst b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
index 27942446f406..584000b743f3 100644
--- a/Documentation/virt/uml/user_mode_linux_howto_v2.rst
+++ b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
@@ -217,6 +217,8 @@ remote UML and other VM instances.
+-----------+--------+------------------------------------+------------+
| fd | vector | dependent on fd type | varies |
+-----------+--------+------------------------------------+------------+
+| vde | vector | dep. on VDE VPN: Virt.Net Locator | varies |
++-----------+--------+------------------------------------+------------+
| tuntap | legacy | none | ~ 500Mbit |
+-----------+--------+------------------------------------+------------+
| daemon | legacy | none | ~ 450Mbit |
@@ -573,6 +575,41 @@ https://github.com/NetSys/bess/wiki/Built-In-Modules-and-Ports
BESS transport does not require any special privileges.
+VDE vector transport
+--------------------
+
+Virtual Distributed Ethernet (VDE) is a project whose main goal is to provide a
+highly flexible support for virtual networking.
+
+http://wiki.virtualsquare.org/#/tutorials/vdebasics
+
+Common usages of VDE include fast prototyping and teaching.
+
+Examples:
+
+ ``vecX:transport=vde,vnl=tap://tap0``
+
+use tap0
+
+ ``vecX:transport=vde,vnl=slirp://``
+
+use slirp
+
+ ``vec0:transport=vde,vnl=vde:///tmp/switch``
+
+connect to a vde switch
+
+ ``vecX:transport=\"vde,vnl=cmd://ssh remote.host //tmp/sshlirp\"``
+
+connect to a remote slirp (instant VPN: convert ssh to VPN, it uses sshlirp)
+https://github.com/virtualsquare/sshlirp
+
+ ``vec0:transport=vde,vnl=vxvde://234.0.0.1``
+
+connect to a local area cloud (all the UML nodes using the same
+multicast address running on hosts in the same multicast domain (LAN)
+will be automagically connected together to a virtual LAN.
+
Configuring Legacy transports
=============================
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.rst b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
index a1c3f038ce0e..e83609a5d007 100644
--- a/Documentation/watchdog/convert_drivers_to_kernel_api.rst
+++ b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
@@ -75,7 +75,6 @@ Example conversion::
-static const struct file_operations s3c2410wdt_fops = {
- .owner = THIS_MODULE,
- - .llseek = no_llseek,
- .write = s3c2410wdt_write,
- .unlocked_ioctl = s3c2410wdt_ioctl,
- .open = s3c2410wdt_open,
diff --git a/MAINTAINERS b/MAINTAINERS
index 00716c1faff6..c27f3190737f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5728,8 +5728,7 @@ L: linux-cxl@vger.kernel.org
S: Maintained
F: Documentation/driver-api/cxl
F: drivers/cxl/
-F: include/linux/einj-cxl.h
-F: include/linux/cxl-event.h
+F: include/cxl/
F: include/uapi/linux/cxl_mem.h
F: tools/testing/cxl/
@@ -15679,6 +15678,9 @@ F: include/dt-bindings/clock/mobileye,eyeq5-clk.h
MODULE SUPPORT
M: Luis Chamberlain <mcgrof@kernel.org>
+R: Petr Pavlu <petr.pavlu@suse.com>
+R: Sami Tolvanen <samitolvanen@google.com>
+R: Daniel Gomez <da.gomez@samsung.com>
L: linux-modules@vger.kernel.org
L: linux-kernel@vger.kernel.org
S: Maintained
@@ -19345,10 +19347,7 @@ F: drivers/char/random.c
F: include/linux/random.h
F: include/uapi/linux/random.h
F: drivers/virt/vmgenid.c
-F: include/vdso/getrandom.h
-F: lib/vdso/getrandom.c
-F: arch/x86/entry/vdso/vgetrandom*
-F: arch/x86/include/asm/vdso/getrandom*
+N: ^.*/vdso/[^/]*getrandom[^/]+$
RAPIDIO SUBSYSTEM
M: Matt Porter <mporter@kernel.crashing.org>
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index fe0764173cd0..a0d01c46e408 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2164,7 +2164,7 @@ static void cpu_hyp_uninit(void *discard)
}
}
-int kvm_arch_hardware_enable(void)
+int kvm_arch_enable_virtualization_cpu(void)
{
/*
* Most calls to this function are made with migration
@@ -2184,7 +2184,7 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-void kvm_arch_hardware_disable(void)
+void kvm_arch_disable_virtualization_cpu(void)
{
kvm_timer_cpu_down();
kvm_vgic_cpu_down();
@@ -2380,7 +2380,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits)
/*
* The stub hypercalls are now disabled, so set our local flag to
- * prevent a later re-init attempt in kvm_arch_hardware_enable().
+ * prevent a later re-init attempt in kvm_arch_enable_virtualization_cpu().
*/
__this_cpu_write(kvm_hyp_initialized, 1);
preempt_enable();
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 0eb0436ad4ce..bb35c34f86d2 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -25,6 +25,8 @@ config LOONGARCH
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP
select ARCH_HAS_PTE_SPECIAL
+ select ARCH_HAS_SET_MEMORY
+ select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_INLINE_READ_LOCK if !PREEMPTION
select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION
@@ -82,6 +84,7 @@ config LOONGARCH
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_DEVICES
+ select GENERIC_CPU_VULNERABILITIES
select GENERIC_ENTRY
select GENERIC_GETTIMEOFDAY
select GENERIC_IOREMAP if !ARCH_IOREMAP
@@ -147,7 +150,7 @@ config LOONGARCH
select HAVE_LIVEPATCH
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
- select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB && !CC_IS_CLANG
+ select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -267,7 +270,7 @@ config AS_HAS_FCSR_CLASS
def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0)
config AS_HAS_THIN_ADD_SUB
- def_bool $(cc-option,-Wa$(comma)-mthin-add-sub)
+ def_bool $(cc-option,-Wa$(comma)-mthin-add-sub) || AS_IS_LLVM
config AS_HAS_LSX_EXTENSION
def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0)
diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h
index 99af8b3160a8..c86f0ab922ec 100644
--- a/arch/loongarch/include/asm/atomic.h
+++ b/arch/loongarch/include/asm/atomic.h
@@ -15,6 +15,7 @@
#define __LL "ll.w "
#define __SC "sc.w "
#define __AMADD "amadd.w "
+#define __AMOR "amor.w "
#define __AMAND_DB "amand_db.w "
#define __AMOR_DB "amor_db.w "
#define __AMXOR_DB "amxor_db.w "
@@ -22,6 +23,7 @@
#define __LL "ll.d "
#define __SC "sc.d "
#define __AMADD "amadd.d "
+#define __AMOR "amor.d "
#define __AMAND_DB "amand_db.d "
#define __AMOR_DB "amor_db.d "
#define __AMXOR_DB "amxor_db.d "
diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h
index 16a716f88a5c..fc83bb32f9f0 100644
--- a/arch/loongarch/include/asm/cpu-features.h
+++ b/arch/loongarch/include/asm/cpu-features.h
@@ -51,6 +51,7 @@
#define cpu_has_lbt_mips cpu_opt(LOONGARCH_CPU_LBT_MIPS)
#define cpu_has_lbt (cpu_has_lbt_x86|cpu_has_lbt_arm|cpu_has_lbt_mips)
#define cpu_has_csr cpu_opt(LOONGARCH_CPU_CSR)
+#define cpu_has_iocsr cpu_opt(LOONGARCH_CPU_IOCSR)
#define cpu_has_tlb cpu_opt(LOONGARCH_CPU_TLB)
#define cpu_has_watch cpu_opt(LOONGARCH_CPU_WATCH)
#define cpu_has_vint cpu_opt(LOONGARCH_CPU_VINT)
@@ -65,6 +66,7 @@
#define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID)
#define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR)
#define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW)
+#define cpu_has_lspw cpu_opt(LOONGARCH_CPU_LSPW)
#define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT)
#endif /* __ASM_CPU_FEATURES_H */
diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
index 843f9c4ec980..98cf4d7b4b0a 100644
--- a/arch/loongarch/include/asm/cpu.h
+++ b/arch/loongarch/include/asm/cpu.h
@@ -87,19 +87,21 @@ enum cpu_type_enum {
#define CPU_FEATURE_LBT_MIPS 12 /* CPU has MIPS Binary Translation */
#define CPU_FEATURE_TLB 13 /* CPU has TLB */
#define CPU_FEATURE_CSR 14 /* CPU has CSR */
-#define CPU_FEATURE_WATCH 15 /* CPU has watchpoint registers */
-#define CPU_FEATURE_VINT 16 /* CPU has vectored interrupts */
-#define CPU_FEATURE_CSRIPI 17 /* CPU has CSR-IPI */
-#define CPU_FEATURE_EXTIOI 18 /* CPU has EXT-IOI */
-#define CPU_FEATURE_PREFETCH 19 /* CPU has prefetch instructions */
-#define CPU_FEATURE_PMP 20 /* CPU has perfermance counter */
-#define CPU_FEATURE_SCALEFREQ 21 /* CPU supports cpufreq scaling */
-#define CPU_FEATURE_FLATMODE 22 /* CPU has flat mode */
-#define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */
-#define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */
-#define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */
-#define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */
-#define CPU_FEATURE_AVECINT 27 /* CPU has avec interrupt */
+#define CPU_FEATURE_IOCSR 15 /* CPU has IOCSR */
+#define CPU_FEATURE_WATCH 16 /* CPU has watchpoint registers */
+#define CPU_FEATURE_VINT 17 /* CPU has vectored interrupts */
+#define CPU_FEATURE_CSRIPI 18 /* CPU has CSR-IPI */
+#define CPU_FEATURE_EXTIOI 19 /* CPU has EXT-IOI */
+#define CPU_FEATURE_PREFETCH 20 /* CPU has prefetch instructions */
+#define CPU_FEATURE_PMP 21 /* CPU has perfermance counter */
+#define CPU_FEATURE_SCALEFREQ 22 /* CPU supports cpufreq scaling */
+#define CPU_FEATURE_FLATMODE 23 /* CPU has flat mode */
+#define CPU_FEATURE_EIODECODE 24 /* CPU has EXTIOI interrupt pin decode mode */
+#define CPU_FEATURE_GUESTID 25 /* CPU has GuestID feature */
+#define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */
+#define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */
+#define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */
+#define CPU_FEATURE_AVECINT 29 /* CPU has AVEC interrupt */
#define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG)
#define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM)
@@ -115,6 +117,7 @@ enum cpu_type_enum {
#define LOONGARCH_CPU_LBT_ARM BIT_ULL(CPU_FEATURE_LBT_ARM)
#define LOONGARCH_CPU_LBT_MIPS BIT_ULL(CPU_FEATURE_LBT_MIPS)
#define LOONGARCH_CPU_TLB BIT_ULL(CPU_FEATURE_TLB)
+#define LOONGARCH_CPU_IOCSR BIT_ULL(CPU_FEATURE_IOCSR)
#define LOONGARCH_CPU_CSR BIT_ULL(CPU_FEATURE_CSR)
#define LOONGARCH_CPU_WATCH BIT_ULL(CPU_FEATURE_WATCH)
#define LOONGARCH_CPU_VINT BIT_ULL(CPU_FEATURE_VINT)
@@ -128,6 +131,7 @@ enum cpu_type_enum {
#define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID)
#define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR)
#define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW)
+#define LOONGARCH_CPU_LSPW BIT_ULL(CPU_FEATURE_LSPW)
#define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT)
#endif /* _ASM_CPU_H */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 04bf1a7f903a..26542413a5b0 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -62,6 +62,7 @@
#define LOONGARCH_CPUCFG1 0x1
#define CPUCFG1_ISGR32 BIT(0)
#define CPUCFG1_ISGR64 BIT(1)
+#define CPUCFG1_ISA GENMASK(1, 0)
#define CPUCFG1_PAGING BIT(2)
#define CPUCFG1_IOCSR BIT(3)
#define CPUCFG1_PABITS GENMASK(11, 4)
diff --git a/arch/loongarch/include/asm/mmu_context.h b/arch/loongarch/include/asm/mmu_context.h
index 9f97c3453b9c..304363bd3935 100644
--- a/arch/loongarch/include/asm/mmu_context.h
+++ b/arch/loongarch/include/asm/mmu_context.h
@@ -49,12 +49,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
/* Normal, classic get_new_mmu_context */
static inline void
-get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
+get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, bool *need_flush)
{
u64 asid = asid_cache(cpu);
if (!((++asid) & cpu_asid_mask(&cpu_data[cpu])))
- local_flush_tlb_user(); /* start new asid cycle */
+ *need_flush = true; /* start new asid cycle */
cpu_context(cpu, mm) = asid_cache(cpu) = asid;
}
@@ -74,21 +74,34 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
return 0;
}
+static inline void atomic_update_pgd_asid(unsigned long asid, unsigned long pgdl)
+{
+ __asm__ __volatile__(
+ "csrwr %[pgdl_val], %[pgdl_reg] \n\t"
+ "csrwr %[asid_val], %[asid_reg] \n\t"
+ : [asid_val] "+r" (asid), [pgdl_val] "+r" (pgdl)
+ : [asid_reg] "i" (LOONGARCH_CSR_ASID), [pgdl_reg] "i" (LOONGARCH_CSR_PGDL)
+ : "memory"
+ );
+}
+
static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
+ bool need_flush = false;
unsigned int cpu = smp_processor_id();
/* Check if our ASID is of an older version and thus invalid */
if (!asid_valid(next, cpu))
- get_new_mmu_context(next, cpu);
-
- write_csr_asid(cpu_asid(cpu, next));
+ get_new_mmu_context(next, cpu, &need_flush);
if (next != &init_mm)
- csr_write64((unsigned long)next->pgd, LOONGARCH_CSR_PGDL);
+ atomic_update_pgd_asid(cpu_asid(cpu, next), (unsigned long)next->pgd);
else
- csr_write64((unsigned long)invalid_pg_dir, LOONGARCH_CSR_PGDL);
+ atomic_update_pgd_asid(cpu_asid(cpu, next), (unsigned long)invalid_pg_dir);
+
+ if (need_flush)
+ local_flush_tlb_user(); /* Flush tlb after update ASID */
/*
* Mark current->active_mm as not "active" anymore.
@@ -135,9 +148,15 @@ drop_mmu_context(struct mm_struct *mm, unsigned int cpu)
asid = read_csr_asid() & cpu_asid_mask(&current_cpu_data);
if (asid == cpu_asid(cpu, mm)) {
+ bool need_flush = false;
+
if (!current->mm || (current->mm == mm)) {
- get_new_mmu_context(mm, cpu);
+ get_new_mmu_context(mm, cpu, &need_flush);
+
write_csr_asid(cpu_asid(cpu, mm));
+ if (need_flush)
+ local_flush_tlb_user(); /* Flush tlb after update ASID */
+
goto out;
}
}
diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h
index 8f290e5546cf..87be9b14e9da 100644
--- a/arch/loongarch/include/asm/percpu.h
+++ b/arch/loongarch/include/asm/percpu.h
@@ -68,75 +68,6 @@ PERCPU_OP(and, and, &)
PERCPU_OP(or, or, |)
#undef PERCPU_OP
-static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size)
-{
- unsigned long ret;
-
- switch (size) {
- case 1:
- __asm__ __volatile__ ("ldx.b %[ret], $r21, %[ptr] \n"
- : [ret] "=&r"(ret)
- : [ptr] "r"(ptr)
- : "memory");
- break;
- case 2:
- __asm__ __volatile__ ("ldx.h %[ret], $r21, %[ptr] \n"
- : [ret] "=&r"(ret)
- : [ptr] "r"(ptr)
- : "memory");
- break;
- case 4:
- __asm__ __volatile__ ("ldx.w %[ret], $r21, %[ptr] \n"
- : [ret] "=&r"(ret)
- : [ptr] "r"(ptr)
- : "memory");
- break;
- case 8:
- __asm__ __volatile__ ("ldx.d %[ret], $r21, %[ptr] \n"
- : [ret] "=&r"(ret)
- : [ptr] "r"(ptr)
- : "memory");
- break;
- default:
- ret = 0;
- BUILD_BUG();
- }
-
- return ret;
-}
-
-static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size)
-{
- switch (size) {
- case 1:
- __asm__ __volatile__("stx.b %[val], $r21, %[ptr] \n"
- :
- : [val] "r" (val), [ptr] "r" (ptr)
- : "memory");
- break;
- case 2:
- __asm__ __volatile__("stx.h %[val], $r21, %[ptr] \n"
- :
- : [val] "r" (val), [ptr] "r" (ptr)
- : "memory");
- break;
- case 4:
- __asm__ __volatile__("stx.w %[val], $r21, %[ptr] \n"
- :
- : [val] "r" (val), [ptr] "r" (ptr)
- : "memory");
- break;
- case 8:
- __asm__ __volatile__("stx.d %[val], $r21, %[ptr] \n"
- :
- : [val] "r" (val), [ptr] "r" (ptr)
- : "memory");
- break;
- default:
- BUILD_BUG();
- }
-}
-
static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size)
{
switch (size) {
@@ -157,6 +88,33 @@ static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
return 0;
}
+#define __pcpu_op_1(op) op ".b "
+#define __pcpu_op_2(op) op ".h "
+#define __pcpu_op_4(op) op ".w "
+#define __pcpu_op_8(op) op ".d "
+
+#define _percpu_read(size, _pcp) \
+({ \
+ typeof(_pcp) __pcp_ret; \
+ \
+ __asm__ __volatile__( \
+ __pcpu_op_##size("ldx") "%[ret], $r21, %[ptr] \n" \
+ : [ret] "=&r"(__pcp_ret) \
+ : [ptr] "r"(&(_pcp)) \
+ : "memory"); \
+ \
+ __pcp_ret; \
+})
+
+#define _percpu_write(size, _pcp, _val) \
+do { \
+ __asm__ __volatile__( \
+ __pcpu_op_##size("stx") "%[val], $r21, %[ptr] \n" \
+ : \
+ : [val] "r"(_val), [ptr] "r"(&(_pcp)) \
+ : "memory"); \
+} while (0)
+
/* this_cpu_cmpxchg */
#define _protect_cmpxchg_local(pcp, o, n) \
({ \
@@ -167,18 +125,6 @@ static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
__ret; \
})
-#define _percpu_read(pcp) \
-({ \
- typeof(pcp) __retval; \
- __retval = (typeof(pcp))__percpu_read(&(pcp), sizeof(pcp)); \
- __retval; \
-})
-
-#define _percpu_write(pcp, val) \
-do { \
- __percpu_write(&(pcp), (unsigned long)(val), sizeof(pcp)); \
-} while (0) \
-
#define _pcp_protect(operation, pcp, val) \
({ \
typeof(pcp) __retval; \
@@ -215,15 +161,15 @@ do { \
#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val)
#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val)
-#define this_cpu_read_1(pcp) _percpu_read(pcp)
-#define this_cpu_read_2(pcp) _percpu_read(pcp)
-#define this_cpu_read_4(pcp) _percpu_read(pcp)
-#define this_cpu_read_8(pcp) _percpu_read(pcp)
+#define this_cpu_read_1(pcp) _percpu_read(1, pcp)
+#define this_cpu_read_2(pcp) _percpu_read(2, pcp)
+#define this_cpu_read_4(pcp) _percpu_read(4, pcp)
+#define this_cpu_read_8(pcp) _percpu_read(8, pcp)
-#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val)
+#define this_cpu_write_1(pcp, val) _percpu_write(1, pcp, val)
+#define this_cpu_write_2(pcp, val) _percpu_write(2, pcp, val)
+#define this_cpu_write_4(pcp, val) _percpu_write(4, pcp, val)
+#define this_cpu_write_8(pcp, val) _percpu_write(8, pcp, val)
#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val)
#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val)
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 85431f20a14d..9965f52ef65b 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -331,29 +331,23 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
* Make sure the buddy is global too (if it's !none,
* it better already be global)
*/
+ if (pte_none(ptep_get(buddy))) {
#ifdef CONFIG_SMP
- /*
- * For SMP, multiple CPUs can race, so we need to do
- * this atomically.
- */
- unsigned long page_global = _PAGE_GLOBAL;
- unsigned long tmp;
-
- __asm__ __volatile__ (
- "1:" __LL "%[tmp], %[buddy] \n"
- " bnez %[tmp], 2f \n"
- " or %[tmp], %[tmp], %[global] \n"
- __SC "%[tmp], %[buddy] \n"
- " beqz %[tmp], 1b \n"
- " nop \n"
- "2: \n"
- __WEAK_LLSC_MB
- : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
- : [global] "r" (page_global));
+ /*
+ * For SMP, multiple CPUs can race, so we need
+ * to do this atomically.
+ */
+ __asm__ __volatile__(
+ __AMOR "$zero, %[global], %[buddy] \n"
+ : [buddy] "+ZB" (buddy->pte)
+ : [global] "r" (_PAGE_GLOBAL)
+ : "memory");
+
+ DBAR(0b11000); /* o_wrw = 0b11000 */
#else /* !CONFIG_SMP */
- if (pte_none(ptep_get(buddy)))
WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL));
#endif /* CONFIG_SMP */
+ }
}
}
diff --git a/arch/loongarch/include/asm/set_memory.h b/arch/loongarch/include/asm/set_memory.h
new file mode 100644
index 000000000000..d70505b6676c
--- /dev/null
+++ b/arch/loongarch/include/asm/set_memory.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_SET_MEMORY_H
+#define _ASM_LOONGARCH_SET_MEMORY_H
+
+/*
+ * Functions to change memory attributes.
+ */
+int set_memory_x(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+
+bool kernel_page_present(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+int set_direct_map_invalid_noflush(struct page *page);
+
+#endif /* _ASM_LOONGARCH_SET_MEMORY_H */
diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h
index 6955a7cb2c65..2b34e56cfa9e 100644
--- a/arch/loongarch/include/uapi/asm/hwcap.h
+++ b/arch/loongarch/include/uapi/asm/hwcap.h
@@ -17,5 +17,6 @@
#define HWCAP_LOONGARCH_LBT_ARM (1 << 11)
#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12)
#define HWCAP_LOONGARCH_PTW (1 << 13)
+#define HWCAP_LOONGARCH_LSPW (1 << 14)
#endif /* _UAPI_ASM_HWCAP_H */
diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h
index 6c22f616b8f1..5cd121275bac 100644
--- a/arch/loongarch/include/uapi/asm/sigcontext.h
+++ b/arch/loongarch/include/uapi/asm/sigcontext.h
@@ -9,7 +9,6 @@
#define _UAPI_ASM_SIGCONTEXT_H
#include <linux/types.h>
-#include <linux/posix_types.h>
/* FP context was used */
#define SC_USED_FP (1 << 0)
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index 929a497c987e..f1a74b80f22c 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/acpi.h>
+#include <linux/efi-bgrt.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/memblock.h>
@@ -212,6 +213,9 @@ void __init acpi_boot_table_init(void)
/* Do not enable ACPI SPCR console by default */
acpi_parse_spcr(earlycon_acpi_spcr_enable, false);
+ if (IS_ENABLED(CONFIG_ACPI_BGRT))
+ acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
+
return;
fdt_earlycon:
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index 14f0449f5452..cbce099037b2 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -91,12 +91,30 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
unsigned int config;
unsigned long asid_mask;
- c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR |
- LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH;
+ c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | LOONGARCH_CPU_VINT;
elf_hwcap = HWCAP_LOONGARCH_CPUCFG;
config = read_cpucfg(LOONGARCH_CPUCFG1);
+
+ switch (config & CPUCFG1_ISA) {
+ case 0:
+ set_isa(c, LOONGARCH_CPU_ISA_LA32R);
+ break;
+ case 1:
+ set_isa(c, LOONGARCH_CPU_ISA_LA32S);
+ break;
+ case 2:
+ set_isa(c, LOONGARCH_CPU_ISA_LA64);
+ break;
+ default:
+ pr_warn("Warning: unknown ISA level\n");
+ }
+
+ if (config & CPUCFG1_PAGING)
+ c->options |= LOONGARCH_CPU_TLB;
+ if (config & CPUCFG1_IOCSR)
+ c->options |= LOONGARCH_CPU_IOCSR;
if (config & CPUCFG1_UAL) {
c->options |= LOONGARCH_CPU_UAL;
elf_hwcap |= HWCAP_LOONGARCH_UAL;
@@ -139,6 +157,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_PTW;
elf_hwcap |= HWCAP_LOONGARCH_PTW;
}
+ if (config & CPUCFG2_LSPW) {
+ c->options |= LOONGARCH_CPU_LSPW;
+ elf_hwcap |= HWCAP_LOONGARCH_LSPW;
+ }
if (config & CPUCFG2_LVZP) {
c->options |= LOONGARCH_CPU_LVZ;
elf_hwcap |= HWCAP_LOONGARCH_LVZ;
@@ -162,22 +184,6 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
if (config & CPUCFG6_PMP)
c->options |= LOONGARCH_CPU_PMP;
- config = iocsr_read32(LOONGARCH_IOCSR_FEATURES);
- if (config & IOCSRF_CSRIPI)
- c->options |= LOONGARCH_CPU_CSRIPI;
- if (config & IOCSRF_EXTIOI)
- c->options |= LOONGARCH_CPU_EXTIOI;
- if (config & IOCSRF_FREQSCALE)
- c->options |= LOONGARCH_CPU_SCALEFREQ;
- if (config & IOCSRF_FLATMODE)
- c->options |= LOONGARCH_CPU_FLATMODE;
- if (config & IOCSRF_EIODECODE)
- c->options |= LOONGARCH_CPU_EIODECODE;
- if (config & IOCSRF_AVEC)
- c->options |= LOONGARCH_CPU_AVECINT;
- if (config & IOCSRF_VM)
- c->options |= LOONGARCH_CPU_HYPERVISOR;
-
config = csr_read32(LOONGARCH_CSR_ASID);
config = (config & CSR_ASID_BIT) >> CSR_ASID_BIT_SHIFT;
asid_mask = GENMASK(config - 1, 0);
@@ -210,6 +216,9 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
default:
pr_warn("Warning: unknown TLB type\n");
}
+
+ if (get_num_brps() + get_num_wrps())
+ c->options |= LOONGARCH_CPU_WATCH;
}
#define MAX_NAME_LEN 32
@@ -220,52 +229,67 @@ static char cpu_full_name[MAX_NAME_LEN] = " - ";
static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int cpu)
{
+ uint32_t config;
uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]);
uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]);
+ const char *core_name = "Unknown";
- if (!__cpu_full_name[cpu])
- __cpu_full_name[cpu] = cpu_full_name;
-
- *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR);
- *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME);
-
- switch (c->processor_id & PRID_SERIES_MASK) {
- case PRID_SERIES_LA132:
+ switch (BIT(fls(c->isa_level) - 1)) {
+ case LOONGARCH_CPU_ISA_LA32R:
+ case LOONGARCH_CPU_ISA_LA32S:
c->cputype = CPU_LOONGSON32;
- set_isa(c, LOONGARCH_CPU_ISA_LA32S);
__cpu_family[cpu] = "Loongson-32bit";
- pr_info("32-bit Loongson Processor probed (LA132 Core)\n");
break;
- case PRID_SERIES_LA264:
+ case LOONGARCH_CPU_ISA_LA64:
c->cputype = CPU_LOONGSON64;
- set_isa(c, LOONGARCH_CPU_ISA_LA64);
__cpu_family[cpu] = "Loongson-64bit";
- pr_info("64-bit Loongson Processor probed (LA264 Core)\n");
+ break;
+ }
+
+ switch (c->processor_id & PRID_SERIES_MASK) {
+ case PRID_SERIES_LA132:
+ core_name = "LA132";
+ break;
+ case PRID_SERIES_LA264:
+ core_name = "LA264";
break;
case PRID_SERIES_LA364:
- c->cputype = CPU_LOONGSON64;
- set_isa(c, LOONGARCH_CPU_ISA_LA64);
- __cpu_family[cpu] = "Loongson-64bit";
- pr_info("64-bit Loongson Processor probed (LA364 Core)\n");
+ core_name = "LA364";
break;
case PRID_SERIES_LA464:
- c->cputype = CPU_LOONGSON64;
- set_isa(c, LOONGARCH_CPU_ISA_LA64);
- __cpu_family[cpu] = "Loongson-64bit";
- pr_info("64-bit Loongson Processor probed (LA464 Core)\n");
+ core_name = "LA464";
break;
case PRID_SERIES_LA664:
- c->cputype = CPU_LOONGSON64;
- set_isa(c, LOONGARCH_CPU_ISA_LA64);
- __cpu_family[cpu] = "Loongson-64bit";
- pr_info("64-bit Loongson Processor probed (LA664 Core)\n");
+ core_name = "LA664";
break;
- default: /* Default to 64 bit */
- c->cputype = CPU_LOONGSON64;
- set_isa(c, LOONGARCH_CPU_ISA_LA64);
- __cpu_family[cpu] = "Loongson-64bit";
- pr_info("64-bit Loongson Processor probed (Unknown Core)\n");
}
+
+ pr_info("%s Processor probed (%s Core)\n", __cpu_family[cpu], core_name);
+
+ if (!cpu_has_iocsr)
+ return;
+
+ if (!__cpu_full_name[cpu])
+ __cpu_full_name[cpu] = cpu_full_name;
+
+ *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR);
+ *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME);
+
+ config = iocsr_read32(LOONGARCH_IOCSR_FEATURES);
+ if (config & IOCSRF_CSRIPI)
+ c->options |= LOONGARCH_CPU_CSRIPI;
+ if (config & IOCSRF_EXTIOI)
+ c->options |= LOONGARCH_CPU_EXTIOI;
+ if (config & IOCSRF_FREQSCALE)
+ c->options |= LOONGARCH_CPU_SCALEFREQ;
+ if (config & IOCSRF_FLATMODE)
+ c->options |= LOONGARCH_CPU_FLATMODE;
+ if (config & IOCSRF_EIODECODE)
+ c->options |= LOONGARCH_CPU_EIODECODE;
+ if (config & IOCSRF_AVEC)
+ c->options |= LOONGARCH_CPU_AVECINT;
+ if (config & IOCSRF_VM)
+ c->options |= LOONGARCH_CPU_HYPERVISOR;
}
#ifdef CONFIG_64BIT
diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c
index 0d33cbc47e51..6ce46d92f1f1 100644
--- a/arch/loongarch/kernel/proc.c
+++ b/arch/loongarch/kernel/proc.c
@@ -31,6 +31,7 @@ int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v)
static int show_cpuinfo(struct seq_file *m, void *v)
{
unsigned long n = (unsigned long) v - 1;
+ unsigned int isa = cpu_data[n].isa_level;
unsigned int version = cpu_data[n].processor_id & 0xff;
unsigned int fp_version = cpu_data[n].fpu_vers;
struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args;
@@ -64,9 +65,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
cpu_pabits + 1, cpu_vabits + 1);
seq_printf(m, "ISA\t\t\t:");
- if (cpu_has_loongarch32)
- seq_printf(m, " loongarch32");
- if (cpu_has_loongarch64)
+ if (isa & LOONGARCH_CPU_ISA_LA32R)
+ seq_printf(m, " loongarch32r");
+ if (isa & LOONGARCH_CPU_ISA_LA32S)
+ seq_printf(m, " loongarch32s");
+ if (isa & LOONGARCH_CPU_ISA_LA64)
seq_printf(m, " loongarch64");
seq_printf(m, "\n");
@@ -81,6 +84,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (cpu_has_complex) seq_printf(m, " complex");
if (cpu_has_crypto) seq_printf(m, " crypto");
if (cpu_has_ptw) seq_printf(m, " ptw");
+ if (cpu_has_lspw) seq_printf(m, " lspw");
if (cpu_has_lvz) seq_printf(m, " lvz");
if (cpu_has_lbt_x86) seq_printf(m, " lbt_x86");
if (cpu_has_lbt_arm) seq_printf(m, " lbt_arm");
diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c
index ba5d0930a74f..168bd97540f8 100644
--- a/arch/loongarch/kernel/syscall.c
+++ b/arch/loongarch/kernel/syscall.c
@@ -79,7 +79,3 @@ void noinstr __no_stack_protector do_syscall(struct pt_regs *regs)
syscall_exit_to_user_mode(regs);
}
-
-#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
-STACK_FRAME_NON_STANDARD(do_syscall);
-#endif
diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c
index 844736b99d38..27e9b94c0a0b 100644
--- a/arch/loongarch/kvm/main.c
+++ b/arch/loongarch/kvm/main.c
@@ -261,7 +261,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -ENOIOCTLCMD;
}
-int kvm_arch_hardware_enable(void)
+int kvm_arch_enable_virtualization_cpu(void)
{
unsigned long env, gcfg = 0;
@@ -300,7 +300,7 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-void kvm_arch_hardware_disable(void)
+void kvm_arch_disable_virtualization_cpu(void)
{
write_csr_gcfg(0);
write_csr_gstat(0);
diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile
index e4d1e581dbae..278be2c8fc36 100644
--- a/arch/loongarch/mm/Makefile
+++ b/arch/loongarch/mm/Makefile
@@ -4,7 +4,8 @@
#
obj-y += init.o cache.o tlb.o tlbex.o extable.o \
- fault.o ioremap.o maccess.o mmap.o pgtable.o page.o
+ fault.o ioremap.o maccess.o mmap.o pgtable.o \
+ page.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_KASAN) += kasan_init.o
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index 97b40defde06..deefd9617d00 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -31,11 +31,52 @@
int show_unhandled_signals = 1;
+static int __kprobes spurious_fault(unsigned long write, unsigned long address)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ if (!(address & __UA_LIMIT))
+ return 0;
+
+ pgd = pgd_offset_k(address);
+ if (!pgd_present(pgdp_get(pgd)))
+ return 0;
+
+ p4d = p4d_offset(pgd, address);
+ if (!p4d_present(p4dp_get(p4d)))
+ return 0;
+
+ pud = pud_offset(p4d, address);
+ if (!pud_present(pudp_get(pud)))
+ return 0;
+
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(pmdp_get(pmd)))
+ return 0;
+
+ if (pmd_leaf(*pmd)) {
+ return write ? pmd_write(pmdp_get(pmd)) : 1;
+ } else {
+ pte = pte_offset_kernel(pmd, address);
+ if (!pte_present(ptep_get(pte)))
+ return 0;
+
+ return write ? pte_write(ptep_get(pte)) : 1;
+ }
+}
+
static void __kprobes no_context(struct pt_regs *regs,
unsigned long write, unsigned long address)
{
const int field = sizeof(unsigned long) * 2;
+ if (spurious_fault(write, address))
+ return;
+
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c
new file mode 100644
index 000000000000..ffd8d76021d4
--- /dev/null
+++ b/arch/loongarch/mm/pageattr.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited
+ */
+
+#include <linux/pagewalk.h>
+#include <linux/pgtable.h>
+#include <asm/set_memory.h>
+#include <asm/tlbflush.h>
+
+struct pageattr_masks {
+ pgprot_t set_mask;
+ pgprot_t clear_mask;
+};
+
+static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
+{
+ unsigned long new_val = val;
+ struct pageattr_masks *masks = walk->private;
+
+ new_val &= ~(pgprot_val(masks->clear_mask));
+ new_val |= (pgprot_val(masks->set_mask));
+
+ return new_val;
+}
+
+static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pgd_t val = pgdp_get(pgd);
+
+ if (pgd_leaf(val)) {
+ val = __pgd(set_pageattr_masks(pgd_val(val), walk));
+ set_pgd(pgd, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ p4d_t val = p4dp_get(p4d);
+
+ if (p4d_leaf(val)) {
+ val = __p4d(set_pageattr_masks(p4d_val(val), walk));
+ set_p4d(p4d, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pud_t val = pudp_get(pud);
+
+ if (pud_leaf(val)) {
+ val = __pud(set_pageattr_masks(pud_val(val), walk));
+ set_pud(pud, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pmd_t val = pmdp_get(pmd);
+
+ if (pmd_leaf(val)) {
+ val = __pmd(set_pageattr_masks(pmd_val(val), walk));
+ set_pmd(pmd, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t val = ptep_get(pte);
+
+ val = __pte(set_pageattr_masks(pte_val(val), walk));
+ set_pte(pte, val);
+
+ return 0;
+}
+
+static int pageattr_pte_hole(unsigned long addr, unsigned long next,
+ int depth, struct mm_walk *walk)
+{
+ return 0;
+}
+
+static const struct mm_walk_ops pageattr_ops = {
+ .pgd_entry = pageattr_pgd_entry,
+ .p4d_entry = pageattr_p4d_entry,
+ .pud_entry = pageattr_pud_entry,
+ .pmd_entry = pageattr_pmd_entry,
+ .pte_entry = pageattr_pte_entry,
+ .pte_hole = pageattr_pte_hole,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask)
+{
+ int ret;
+ unsigned long start = addr;
+ unsigned long end = start + PAGE_SIZE * numpages;
+ struct pageattr_masks masks = {
+ .set_mask = set_mask,
+ .clear_mask = clear_mask
+ };
+
+ if (!numpages)
+ return 0;
+
+ mmap_write_lock(&init_mm);
+ ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks);
+ mmap_write_unlock(&init_mm);
+
+ flush_tlb_kernel_range(start, end);
+
+ return ret;
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+ if (addr < vm_map_base)
+ return 0;
+
+ return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_NO_EXEC));
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+ if (addr < vm_map_base)
+ return 0;
+
+ return __set_memory(addr, numpages, __pgprot(_PAGE_NO_EXEC), __pgprot(0));
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+ if (addr < vm_map_base)
+ return 0;
+
+ return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_WRITE | _PAGE_DIRTY));
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+ if (addr < vm_map_base)
+ return 0;
+
+ return __set_memory(addr, numpages, __pgprot(_PAGE_WRITE | _PAGE_DIRTY), __pgprot(0));
+}
+
+bool kernel_page_present(struct page *page)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long addr = (unsigned long)page_address(page);
+
+ if (addr < vm_map_base)
+ return true;
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(pgdp_get(pgd)))
+ return false;
+ if (pgd_leaf(pgdp_get(pgd)))
+ return true;
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(p4dp_get(p4d)))
+ return false;
+ if (p4d_leaf(p4dp_get(p4d)))
+ return true;
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(pudp_get(pud)))
+ return false;
+ if (pud_leaf(pudp_get(pud)))
+ return true;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(pmdp_get(pmd)))
+ return false;
+ if (pmd_leaf(pmdp_get(pmd)))
+ return true;
+
+ pte = pte_offset_kernel(pmd, addr);
+ return pte_present(ptep_get(pte));
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+
+ if (addr < vm_map_base)
+ return 0;
+
+ return __set_memory(addr, 1, PAGE_KERNEL, __pgprot(0));
+}
+
+int set_direct_map_invalid_noflush(struct page *page)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+
+ if (addr < vm_map_base)
+ return 0;
+
+ return __set_memory(addr, 1, __pgprot(0), __pgprot(_PAGE_PRESENT | _PAGE_VALID));
+}
diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c
index 365f7de771cb..1da4dc46df43 100644
--- a/arch/loongarch/pci/acpi.c
+++ b/arch/loongarch/pci/acpi.c
@@ -225,6 +225,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
if (bus) {
memcpy(bus->sysdata, info->cfg, sizeof(struct pci_config_window));
kfree(info);
+ kfree(root_ops);
} else {
struct pci_bus *child;
diff --git a/arch/loongarch/vdso/vgetrandom-chacha.S b/arch/loongarch/vdso/vgetrandom-chacha.S
index 7e86a50f6e85..c2733e6c3a8d 100644
--- a/arch/loongarch/vdso/vgetrandom-chacha.S
+++ b/arch/loongarch/vdso/vgetrandom-chacha.S
@@ -9,23 +9,11 @@
.text
-/* Salsa20 quarter-round */
-.macro QR a b c d
- add.w \a, \a, \b
- xor \d, \d, \a
- rotri.w \d, \d, 16
-
- add.w \c, \c, \d
- xor \b, \b, \c
- rotri.w \b, \b, 20
-
- add.w \a, \a, \b
- xor \d, \d, \a
- rotri.w \d, \d, 24
-
- add.w \c, \c, \d
- xor \b, \b, \c
- rotri.w \b, \b, 25
+.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3
+ \op \d0, \d0, \s0
+ \op \d1, \d1, \s1
+ \op \d2, \d2, \s2
+ \op \d3, \d3, \s3
.endm
/*
@@ -74,6 +62,23 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
/* Reuse i as copy3 */
#define copy3 i
+/* Packs to be used with OP_4REG */
+#define line0 state0, state1, state2, state3
+#define line1 state4, state5, state6, state7
+#define line2 state8, state9, state10, state11
+#define line3 state12, state13, state14, state15
+
+#define line1_perm state5, state6, state7, state4
+#define line2_perm state10, state11, state8, state9
+#define line3_perm state15, state12, state13, state14
+
+#define copy copy0, copy1, copy2, copy3
+
+#define _16 16, 16, 16, 16
+#define _20 20, 20, 20, 20
+#define _24 24, 24, 24, 24
+#define _25 25, 25, 25, 25
+
/*
* The ABI requires s0-s9 saved, and sp aligned to 16-byte.
* This does not violate the stack-less requirement: no sensitive data
@@ -126,16 +131,38 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
li.w i, 10
.Lpermute:
/* odd round */
- QR state0, state4, state8, state12
- QR state1, state5, state9, state13
- QR state2, state6, state10, state14
- QR state3, state7, state11, state15
+ OP_4REG add.w line0, line1
+ OP_4REG xor line3, line0
+ OP_4REG rotri.w line3, _16
+
+ OP_4REG add.w line2, line3
+ OP_4REG xor line1, line2
+ OP_4REG rotri.w line1, _20
+
+ OP_4REG add.w line0, line1
+ OP_4REG xor line3, line0
+ OP_4REG rotri.w line3, _24
+
+ OP_4REG add.w line2, line3
+ OP_4REG xor line1, line2
+ OP_4REG rotri.w line1, _25
/* even round */
- QR state0, state5, state10, state15
- QR state1, state6, state11, state12
- QR state2, state7, state8, state13
- QR state3, state4, state9, state14
+ OP_4REG add.w line0, line1_perm
+ OP_4REG xor line3_perm, line0
+ OP_4REG rotri.w line3_perm, _16
+
+ OP_4REG add.w line2_perm, line3_perm
+ OP_4REG xor line1_perm, line2_perm
+ OP_4REG rotri.w line1_perm, _20
+
+ OP_4REG add.w line0, line1_perm
+ OP_4REG xor line3_perm, line0
+ OP_4REG rotri.w line3_perm, _24
+
+ OP_4REG add.w line2_perm, line3_perm
+ OP_4REG xor line1_perm, line2_perm
+ OP_4REG rotri.w line1_perm, _25
addi.w i, i, -1
bnez i, .Lpermute
@@ -147,10 +174,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
li.w copy3, 0x6b206574
/* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */
- add.w state0, state0, copy0
- add.w state1, state1, copy1
- add.w state2, state2, copy2
- add.w state3, state3, copy3
+ OP_4REG add.w line0, copy
st.w state0, output, 0
st.w state1, output, 4
st.w state2, output, 8
@@ -165,10 +189,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
ld.w state3, key, 12
/* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */
- add.w state4, state4, state0
- add.w state5, state5, state1
- add.w state6, state6, state2
- add.w state7, state7, state3
+ OP_4REG add.w line1, line0
st.w state4, output, 16
st.w state5, output, 20
st.w state6, output, 24
@@ -181,10 +202,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
ld.w state3, key, 28
/* output[8,9,10,11] = state[0,1,2,3] + state[8,9,10,11] */
- add.w state8, state8, state0
- add.w state9, state9, state1
- add.w state10, state10, state2
- add.w state11, state11, state3
+ OP_4REG add.w line2, line0
st.w state8, output, 32
st.w state9, output, 36
st.w state10, output, 40
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 6743a57c1ab4..f7222eb594ea 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -728,8 +728,8 @@ struct kvm_mips_callbacks {
int (*handle_fpe)(struct kvm_vcpu *vcpu);
int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
int (*handle_guest_exit)(struct kvm_vcpu *vcpu);
- int (*hardware_enable)(void);
- void (*hardware_disable)(void);
+ int (*enable_virtualization_cpu)(void);
+ void (*disable_virtualization_cpu)(void);
int (*check_extension)(struct kvm *kvm, long ext);
int (*vcpu_init)(struct kvm_vcpu *vcpu);
void (*vcpu_uninit)(struct kvm_vcpu *vcpu);
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index b5de770b092e..60b43ea85c12 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -125,14 +125,14 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return 1;
}
-int kvm_arch_hardware_enable(void)
+int kvm_arch_enable_virtualization_cpu(void)
{
- return kvm_mips_callbacks->hardware_enable();
+ return kvm_mips_callbacks->enable_virtualization_cpu();
}
-void kvm_arch_hardware_disable(void)
+void kvm_arch_disable_virtualization_cpu(void)
{
- kvm_mips_callbacks->hardware_disable();
+ kvm_mips_callbacks->disable_virtualization_cpu();
}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index 99d5a71e4300..ccab4d76b126 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -2869,7 +2869,7 @@ static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size)
return ret + 1;
}
-static int kvm_vz_hardware_enable(void)
+static int kvm_vz_enable_virtualization_cpu(void)
{
unsigned int mmu_size, guest_mmu_size, ftlb_size;
u64 guest_cvmctl, cvmvmconfig;
@@ -2983,7 +2983,7 @@ static int kvm_vz_hardware_enable(void)
return 0;
}
-static void kvm_vz_hardware_disable(void)
+static void kvm_vz_disable_virtualization_cpu(void)
{
u64 cvmvmconfig;
unsigned int mmu_size;
@@ -3280,8 +3280,8 @@ static struct kvm_mips_callbacks kvm_vz_callbacks = {
.handle_msa_disabled = kvm_trap_vz_handle_msa_disabled,
.handle_guest_exit = kvm_trap_vz_handle_guest_exit,
- .hardware_enable = kvm_vz_hardware_enable,
- .hardware_disable = kvm_vz_hardware_disable,
+ .enable_virtualization_cpu = kvm_vz_enable_virtualization_cpu,
+ .disable_virtualization_cpu = kvm_vz_disable_virtualization_cpu,
.check_extension = kvm_vz_check_extension,
.vcpu_init = kvm_vz_vcpu_init,
.vcpu_uninit = kvm_vz_vcpu_uninit,
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index b0f0816879df..5e8e37a722ef 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -466,7 +466,6 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
static const struct file_operations perf_fops = {
- .llseek = no_llseek,
.read = perf_read,
.write = perf_write,
.unlocked_ioctl = perf_ioctl,
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index bab2ec34cd87..f3427f6de608 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -20,7 +20,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-int kvm_arch_hardware_enable(void)
+int kvm_arch_enable_virtualization_cpu(void)
{
csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
@@ -35,7 +35,7 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-void kvm_arch_hardware_disable(void)
+void kvm_arch_disable_virtualization_cpu(void)
{
kvm_riscv_aia_disable();
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 7ec1b8cd0de9..9b57add02cd5 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -59,6 +59,7 @@ CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y
CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_S390_KPROBES_SANITY_TEST=m
CONFIG_S390_MODULES_SANITY_TEST=m
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index 0e855c5e91c5..5d9effb0867c 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -76,7 +76,6 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static const struct file_operations dbfs_ops = {
.read = dbfs_read,
- .llseek = no_llseek,
.unlocked_ioctl = dbfs_ioctl,
};
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 858beaf4a8cb..d428635abf08 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -443,7 +443,6 @@ static const struct file_operations hypfs_file_ops = {
.release = hypfs_release,
.read_iter = hypfs_read_iter,
.write_iter = hypfs_write_iter,
- .llseek = no_llseek,
};
static struct file_system_type hypfs_type = {
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index bce50ca75ea7..e62bea9ab21e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -163,7 +163,6 @@ static const struct file_operations debug_file_ops = {
.write = debug_input,
.open = debug_open,
.release = debug_close,
- .llseek = no_llseek,
};
static struct dentry *debug_debugfs_root_entry;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 18b0d025f3a2..e2e0aa463fbd 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -1698,7 +1698,6 @@ static const struct file_operations cfset_fops = {
.release = cfset_release,
.unlocked_ioctl = cfset_ioctl,
.compat_ioctl = cfset_ioctl,
- .llseek = no_llseek
};
static struct miscdevice cfset_dev = {
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 2be30a96696a..88055f58fbda 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -498,7 +498,6 @@ static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \
.open = stsi_open_##fc##_##s1##_##s2, \
.release = stsi_release, \
.read = stsi_read, \
- .llseek = no_llseek, \
};
static int stsi_release(struct inode *inode, struct file *file)
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index e26e68675c08..aa06c85bcbd3 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -13,10 +13,7 @@
* for details.
*/
.macro vdso_func func
- .globl __kernel_\func
- .type __kernel_\func,@function
- __ALIGN
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
CFI_STARTPROC
aghi %r15,-STACK_FRAME_VDSO_OVERHEAD
CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD)
@@ -32,7 +29,7 @@ __kernel_\func:
CFI_RESTORE 15
br %r14
CFI_ENDPROC
- .size __kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
.endm
vdso_func gettimeofday
@@ -41,16 +38,13 @@ vdso_func clock_gettime
vdso_func getcpu
.macro vdso_syscall func,syscall
- .globl __kernel_\func
- .type __kernel_\func,@function
- __ALIGN
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
CFI_STARTPROC
svc \syscall
/* Make sure we notice when a syscall returns, which shouldn't happen */
.word 0
CFI_ENDPROC
- .size __kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
.endm
vdso_syscall restart_syscall,__NR_restart_syscall
diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
index d802b0a96f41..09c034c2f853 100644
--- a/arch/s390/kernel/vdso64/vgetrandom-chacha.S
+++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
@@ -1,7 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/stringify.h>
#include <linux/linkage.h>
#include <asm/alternative.h>
+#include <asm/dwarf.h>
#include <asm/fpu-insn.h>
#define STATE0 %v0
@@ -12,9 +14,6 @@
#define COPY1 %v5
#define COPY2 %v6
#define COPY3 %v7
-#define PERM4 %v16
-#define PERM8 %v17
-#define PERM12 %v18
#define BEPERM %v19
#define TMP0 %v20
#define TMP1 %v21
@@ -23,13 +22,11 @@
.section .rodata
- .balign 128
-.Lconstants:
+ .balign 32
+SYM_DATA_START_LOCAL(chacha20_constants)
.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
- .long 0x04050607,0x08090a0b,0x0c0d0e0f,0x00010203 # rotl 4 bytes
- .long 0x08090a0b,0x0c0d0e0f,0x00010203,0x04050607 # rotl 8 bytes
- .long 0x0c0d0e0f,0x00010203,0x04050607,0x08090a0b # rotl 12 bytes
.long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
+SYM_DATA_END(chacha20_constants)
.text
/*
@@ -43,13 +40,14 @@
* size_t nblocks)
*/
SYM_FUNC_START(__arch_chacha20_blocks_nostack)
- larl %r1,.Lconstants
+ CFI_STARTPROC
+ larl %r1,chacha20_constants
/* COPY0 = "expand 32-byte k" */
VL COPY0,0,,%r1
- /* PERM4-PERM12,BEPERM = byte selectors for VPERM */
- VLM PERM4,BEPERM,16,%r1
+ /* BEPERM = byte selectors for VPERM */
+ ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
/* COPY1,COPY2 = key */
VLM COPY1,COPY2,0,%r3
@@ -89,11 +87,11 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
VERLLF STATE1,STATE1,7
/* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
- VPERM STATE1,STATE1,STATE1,PERM4
+ VSLDB STATE1,STATE1,STATE1,4
/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
- VPERM STATE2,STATE2,STATE2,PERM8
+ VSLDB STATE2,STATE2,STATE2,8
/* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
- VPERM STATE3,STATE3,STATE3,PERM12
+ VSLDB STATE3,STATE3,STATE3,12
/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
VAF STATE0,STATE0,STATE1
@@ -116,32 +114,38 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
VERLLF STATE1,STATE1,7
/* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
- VPERM STATE1,STATE1,STATE1,PERM12
+ VSLDB STATE1,STATE1,STATE1,12
/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
- VPERM STATE2,STATE2,STATE2,PERM8
+ VSLDB STATE2,STATE2,STATE2,8
/* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
- VPERM STATE3,STATE3,STATE3,PERM4
+ VSLDB STATE3,STATE3,STATE3,4
brctg %r0,.Ldoubleround
- /* OUTPUT0 = STATE0 + STATE0 */
+ /* OUTPUT0 = STATE0 + COPY0 */
VAF STATE0,STATE0,COPY0
- /* OUTPUT1 = STATE1 + STATE1 */
+ /* OUTPUT1 = STATE1 + COPY1 */
VAF STATE1,STATE1,COPY1
- /* OUTPUT2 = STATE2 + STATE2 */
+ /* OUTPUT2 = STATE2 + COPY2 */
VAF STATE2,STATE2,COPY2
- /* OUTPUT2 = STATE3 + STATE3 */
+ /* OUTPUT3 = STATE3 + COPY3 */
VAF STATE3,STATE3,COPY3
- /*
- * 32 bit wise little endian store to OUTPUT. If the vector
- * enhancement facility 2 is not installed use the slow path.
- */
- ALTERNATIVE "brc 0xf,.Lstoreslow", "nop", ALT_FACILITY(148)
- VSTBRF STATE0,0,,%r2
- VSTBRF STATE1,16,,%r2
- VSTBRF STATE2,32,,%r2
- VSTBRF STATE3,48,,%r2
-.Lstoredone:
+ ALTERNATIVE \
+ __stringify( \
+ /* Convert STATE to little endian and store to OUTPUT */\
+ VPERM TMP0,STATE0,STATE0,BEPERM; \
+ VPERM TMP1,STATE1,STATE1,BEPERM; \
+ VPERM TMP2,STATE2,STATE2,BEPERM; \
+ VPERM TMP3,STATE3,STATE3,BEPERM; \
+ VSTM TMP0,TMP3,0,%r2), \
+ __stringify( \
+ /* 32 bit wise little endian store to OUTPUT */ \
+ VSTBRF STATE0,0,,%r2; \
+ VSTBRF STATE1,16,,%r2; \
+ VSTBRF STATE2,32,,%r2; \
+ VSTBRF STATE3,48,,%r2; \
+ brcl 0,0), \
+ ALT_FACILITY(148)
/* ++COPY3.COUNTER */
/* alsih %r3,1 */
@@ -173,13 +177,5 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
VZERO TMP3
br %r14
-
-.Lstoreslow:
- /* Convert STATE to little endian format and store to OUTPUT */
- VPERM TMP0,STATE0,STATE0,BEPERM
- VPERM TMP1,STATE1,STATE1,BEPERM
- VPERM TMP2,STATE2,STATE2,BEPERM
- VPERM TMP3,STATE3,STATE3,BEPERM
- VSTM TMP0,TMP3,0,%r2
- j .Lstoredone
+ CFI_ENDPROC
SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0fd96860fc45..bb7134faaebf 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -348,20 +348,29 @@ static inline int plo_test_bit(unsigned char nr)
return cc == 0;
}
-static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
+static __always_inline void __sortl_query(u8 (*query)[32])
{
asm volatile(
" lghi 0,0\n"
- " lgr 1,%[query]\n"
+ " la 1,%[query]\n"
/* Parameter registers are ignored */
- " .insn rrf,%[opc] << 16,2,4,6,0\n"
+ " .insn rre,0xb9380000,2,4\n"
+ : [query] "=R" (*query)
:
- : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
- : "cc", "memory", "0", "1");
+ : "cc", "0", "1");
}
-#define INSN_SORTL 0xb938
-#define INSN_DFLTCC 0xb939
+static __always_inline void __dfltcc_query(u8 (*query)[32])
+{
+ asm volatile(
+ " lghi 0,0\n"
+ " la 1,%[query]\n"
+ /* Parameter registers are ignored */
+ " .insn rrf,0xb9390000,2,4,6,0\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "0", "1");
+}
static void __init kvm_s390_cpu_feat_init(void)
{
@@ -415,10 +424,10 @@ static void __init kvm_s390_cpu_feat_init(void)
kvm_s390_available_subfunc.kdsa);
if (test_facility(150)) /* SORTL */
- __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
+ __sortl_query(&kvm_s390_available_subfunc.sortl);
if (test_facility(151)) /* DFLTCC */
- __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
+ __dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
if (MACHINE_HAS_ESOP)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index ee90a91ed888..6f55a59a0871 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -657,7 +657,6 @@ static const struct file_operations clp_misc_fops = {
.release = clp_misc_release,
.unlocked_ioctl = clp_misc_ioctl,
.compat_ioctl = clp_misc_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice clp_misc_device = {
diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index 0f384b1f45ca..53fc18a3d4c2 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -14,12 +14,6 @@
#define NO_IRQ_IGNORE ((unsigned int)-1)
/*
- * Simple Mask Register Support
- */
-extern void make_maskreg_irq(unsigned int irq);
-extern unsigned short *irq_mask_register;
-
-/*
* PINT IRQs
*/
void make_imask_irq(unsigned int irq);
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index dca84fd6d00a..c89575d05021 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -11,7 +11,6 @@ config UML
select ARCH_HAS_KCOV
select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER
- select ARCH_NO_PREEMPT_DYNAMIC
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 99a7144b229f..819aabb4ecdc 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -164,7 +164,6 @@ static const struct file_operations harddog_fops = {
.compat_ioctl = compat_ptr_ioctl,
.open = harddog_open,
.release = harddog_release,
- .llseek = no_llseek,
};
static struct miscdevice harddog_miscdev = {
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index c42b793bce65..9d228878cea2 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -291,7 +291,6 @@ static int hostmixer_release(struct inode *inode, struct file *file)
static const struct file_operations hostaudio_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = hostaudio_read,
.write = hostaudio_write,
.poll = hostaudio_poll,
@@ -304,7 +303,6 @@ static const struct file_operations hostaudio_fops = {
static const struct file_operations hostmixer_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = hostmixer_ioctl_mixdev,
.open = hostmixer_open_mixdev,
.release = hostmixer_release,
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 2d473282ab51..c992da83268d 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -22,6 +22,7 @@
#include <linux/interrupt.h>
#include <linux/firmware.h>
#include <linux/fs.h>
+#include <asm/atomic.h>
#include <uapi/linux/filter.h>
#include <init.h>
#include <irq_kern.h>
@@ -102,18 +103,33 @@ static const struct {
static void vector_reset_stats(struct vector_private *vp)
{
+ /* We reuse the existing queue locks for stats */
+
+ /* RX stats are modified with RX head_lock held
+ * in vector_poll.
+ */
+
+ spin_lock(&vp->rx_queue->head_lock);
vp->estats.rx_queue_max = 0;
vp->estats.rx_queue_running_average = 0;
- vp->estats.tx_queue_max = 0;
- vp->estats.tx_queue_running_average = 0;
vp->estats.rx_encaps_errors = 0;
+ vp->estats.sg_ok = 0;
+ vp->estats.sg_linearized = 0;
+ spin_unlock(&vp->rx_queue->head_lock);
+
+ /* TX stats are modified with TX head_lock held
+ * in vector_send.
+ */
+
+ spin_lock(&vp->tx_queue->head_lock);
vp->estats.tx_timeout_count = 0;
vp->estats.tx_restart_queue = 0;
vp->estats.tx_kicks = 0;
vp->estats.tx_flow_control_xon = 0;
vp->estats.tx_flow_control_xoff = 0;
- vp->estats.sg_ok = 0;
- vp->estats.sg_linearized = 0;
+ vp->estats.tx_queue_max = 0;
+ vp->estats.tx_queue_running_average = 0;
+ spin_unlock(&vp->tx_queue->head_lock);
}
static int get_mtu(struct arglist *def)
@@ -232,12 +248,6 @@ static int get_transport_options(struct arglist *def)
static char *drop_buffer;
-/* Array backed queues optimized for bulk enqueue/dequeue and
- * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
- * For more details and full design rationale see
- * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
- */
-
/*
* Advance the mmsg queue head by n = advance. Resets the queue to
@@ -247,27 +257,13 @@ static char *drop_buffer;
static int vector_advancehead(struct vector_queue *qi, int advance)
{
- int queue_depth;
-
qi->head =
(qi->head + advance)
% qi->max_depth;
- spin_lock(&qi->tail_lock);
- qi->queue_depth -= advance;
-
- /* we are at 0, use this to
- * reset head and tail so we can use max size vectors
- */
-
- if (qi->queue_depth == 0) {
- qi->head = 0;
- qi->tail = 0;
- }
- queue_depth = qi->queue_depth;
- spin_unlock(&qi->tail_lock);
- return queue_depth;
+ atomic_sub(advance, &qi->queue_depth);
+ return atomic_read(&qi->queue_depth);
}
/* Advance the queue tail by n = advance.
@@ -277,16 +273,11 @@ static int vector_advancehead(struct vector_queue *qi, int advance)
static int vector_advancetail(struct vector_queue *qi, int advance)
{
- int queue_depth;
-
qi->tail =
(qi->tail + advance)
% qi->max_depth;
- spin_lock(&qi->head_lock);
- qi->queue_depth += advance;
- queue_depth = qi->queue_depth;
- spin_unlock(&qi->head_lock);
- return queue_depth;
+ atomic_add(advance, &qi->queue_depth);
+ return atomic_read(&qi->queue_depth);
}
static int prep_msg(struct vector_private *vp,
@@ -339,9 +330,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
int iov_count;
spin_lock(&qi->tail_lock);
- spin_lock(&qi->head_lock);
- queue_depth = qi->queue_depth;
- spin_unlock(&qi->head_lock);
+ queue_depth = atomic_read(&qi->queue_depth);
if (skb)
packet_len = skb->len;
@@ -360,6 +349,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
mmsg_vector->msg_hdr.msg_iovlen = iov_count;
mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
+ wmb(); /* Make the packet visible to the NAPI poll thread */
queue_depth = vector_advancetail(qi, 1);
} else
goto drop;
@@ -398,7 +388,7 @@ static int consume_vector_skbs(struct vector_queue *qi, int count)
}
/*
- * Generic vector deque via sendmmsg with support for forming headers
+ * Generic vector dequeue via sendmmsg with support for forming headers
* using transport specific callback. Allows GRE, L2TPv3, RAW and
* other transports to use a common dequeue procedure in vector mode
*/
@@ -408,69 +398,64 @@ static int vector_send(struct vector_queue *qi)
{
struct vector_private *vp = netdev_priv(qi->dev);
struct mmsghdr *send_from;
- int result = 0, send_len, queue_depth = qi->max_depth;
+ int result = 0, send_len;
if (spin_trylock(&qi->head_lock)) {
- if (spin_trylock(&qi->tail_lock)) {
- /* update queue_depth to current value */
- queue_depth = qi->queue_depth;
- spin_unlock(&qi->tail_lock);
- while (queue_depth > 0) {
- /* Calculate the start of the vector */
- send_len = queue_depth;
- send_from = qi->mmsg_vector;
- send_from += qi->head;
- /* Adjust vector size if wraparound */
- if (send_len + qi->head > qi->max_depth)
- send_len = qi->max_depth - qi->head;
- /* Try to TX as many packets as possible */
- if (send_len > 0) {
- result = uml_vector_sendmmsg(
- vp->fds->tx_fd,
- send_from,
- send_len,
- 0
- );
- vp->in_write_poll =
- (result != send_len);
- }
- /* For some of the sendmmsg error scenarios
- * we may end being unsure in the TX success
- * for all packets. It is safer to declare
- * them all TX-ed and blame the network.
- */
- if (result < 0) {
- if (net_ratelimit())
- netdev_err(vp->dev, "sendmmsg err=%i\n",
- result);
- vp->in_error = true;
- result = send_len;
- }
- if (result > 0) {
- queue_depth =
- consume_vector_skbs(qi, result);
- /* This is equivalent to an TX IRQ.
- * Restart the upper layers to feed us
- * more packets.
- */
- if (result > vp->estats.tx_queue_max)
- vp->estats.tx_queue_max = result;
- vp->estats.tx_queue_running_average =
- (vp->estats.tx_queue_running_average + result) >> 1;
- }
- netif_wake_queue(qi->dev);
- /* if TX is busy, break out of the send loop,
- * poll write IRQ will reschedule xmit for us
+ /* update queue_depth to current value */
+ while (atomic_read(&qi->queue_depth) > 0) {
+ /* Calculate the start of the vector */
+ send_len = atomic_read(&qi->queue_depth);
+ send_from = qi->mmsg_vector;
+ send_from += qi->head;
+ /* Adjust vector size if wraparound */
+ if (send_len + qi->head > qi->max_depth)
+ send_len = qi->max_depth - qi->head;
+ /* Try to TX as many packets as possible */
+ if (send_len > 0) {
+ result = uml_vector_sendmmsg(
+ vp->fds->tx_fd,
+ send_from,
+ send_len,
+ 0
+ );
+ vp->in_write_poll =
+ (result != send_len);
+ }
+ /* For some of the sendmmsg error scenarios
+ * we may end being unsure in the TX success
+ * for all packets. It is safer to declare
+ * them all TX-ed and blame the network.
+ */
+ if (result < 0) {
+ if (net_ratelimit())
+ netdev_err(vp->dev, "sendmmsg err=%i\n",
+ result);
+ vp->in_error = true;
+ result = send_len;
+ }
+ if (result > 0) {
+ consume_vector_skbs(qi, result);
+ /* This is equivalent to an TX IRQ.
+ * Restart the upper layers to feed us
+ * more packets.
*/
- if (result != send_len) {
- vp->estats.tx_restart_queue++;
- break;
- }
+ if (result > vp->estats.tx_queue_max)
+ vp->estats.tx_queue_max = result;
+ vp->estats.tx_queue_running_average =
+ (vp->estats.tx_queue_running_average + result) >> 1;
+ }
+ netif_wake_queue(qi->dev);
+ /* if TX is busy, break out of the send loop,
+ * poll write IRQ will reschedule xmit for us.
+ */
+ if (result != send_len) {
+ vp->estats.tx_restart_queue++;
+ break;
}
}
spin_unlock(&qi->head_lock);
}
- return queue_depth;
+ return atomic_read(&qi->queue_depth);
}
/* Queue destructor. Deliberately stateless so we can use
@@ -589,7 +574,7 @@ static struct vector_queue *create_queue(
}
spin_lock_init(&result->head_lock);
spin_lock_init(&result->tail_lock);
- result->queue_depth = 0;
+ atomic_set(&result->queue_depth, 0);
result->head = 0;
result->tail = 0;
return result;
@@ -668,18 +653,27 @@ done:
}
-/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
+/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs */
static void prep_queue_for_rx(struct vector_queue *qi)
{
struct vector_private *vp = netdev_priv(qi->dev);
struct mmsghdr *mmsg_vector = qi->mmsg_vector;
void **skbuff_vector = qi->skbuff_vector;
- int i;
+ int i, queue_depth;
+
+ queue_depth = atomic_read(&qi->queue_depth);
- if (qi->queue_depth == 0)
+ if (queue_depth == 0)
return;
- for (i = 0; i < qi->queue_depth; i++) {
+
+ /* RX is always emptied 100% during each cycle, so we do not
+ * have to do the tail wraparound math for it.
+ */
+
+ qi->head = qi->tail = 0;
+
+ for (i = 0; i < queue_depth; i++) {
/* it is OK if allocation fails - recvmmsg with NULL data in
* iov argument still performs an RX, just drops the packet
* This allows us stop faffing around with a "drop buffer"
@@ -689,7 +683,7 @@ static void prep_queue_for_rx(struct vector_queue *qi)
skbuff_vector++;
mmsg_vector++;
}
- qi->queue_depth = 0;
+ atomic_set(&qi->queue_depth, 0);
}
static struct vector_device *find_device(int n)
@@ -972,7 +966,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget)
budget = qi->max_depth;
packet_count = uml_vector_recvmmsg(
- vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
+ vp->fds->rx_fd, qi->mmsg_vector, budget, 0);
if (packet_count < 0)
vp->in_error = true;
@@ -985,7 +979,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget)
* many do we need to prep the next time prep_queue_for_rx() is called.
*/
- qi->queue_depth = packet_count;
+ atomic_add(packet_count, &qi->queue_depth);
for (i = 0; i < packet_count; i++) {
skb = (*skbuff_vector);
@@ -1172,6 +1166,7 @@ static int vector_poll(struct napi_struct *napi, int budget)
if ((vp->options & VECTOR_TX) != 0)
tx_enqueued = (vector_send(vp->tx_queue) > 0);
+ spin_lock(&vp->rx_queue->head_lock);
if ((vp->options & VECTOR_RX) > 0)
err = vector_mmsg_rx(vp, budget);
else {
@@ -1179,12 +1174,13 @@ static int vector_poll(struct napi_struct *napi, int budget)
if (err > 0)
err = 1;
}
+ spin_unlock(&vp->rx_queue->head_lock);
if (err > 0)
work_done += err;
if (tx_enqueued || err > 0)
napi_schedule(napi);
- if (work_done < budget)
+ if (work_done <= budget)
napi_complete_done(napi, work_done);
return work_done;
}
@@ -1225,7 +1221,7 @@ static int vector_net_open(struct net_device *dev)
vp->rx_header_size,
MAX_IOV_SIZE
);
- vp->rx_queue->queue_depth = get_depth(vp->parsed);
+ atomic_set(&vp->rx_queue->queue_depth, get_depth(vp->parsed));
} else {
vp->header_rxbuffer = kmalloc(
vp->rx_header_size,
@@ -1467,7 +1463,17 @@ static void vector_get_ethtool_stats(struct net_device *dev,
{
struct vector_private *vp = netdev_priv(dev);
+ /* Stats are modified in the dequeue portions of
+ * rx/tx which are protected by the head locks
+ * grabbing these locks here ensures they are up
+ * to date.
+ */
+
+ spin_lock(&vp->tx_queue->head_lock);
+ spin_lock(&vp->rx_queue->head_lock);
memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
+ spin_unlock(&vp->rx_queue->head_lock);
+ spin_unlock(&vp->tx_queue->head_lock);
}
static int vector_get_coalesce(struct net_device *netdev,
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
index 806df551be0b..417834793658 100644
--- a/arch/um/drivers/vector_kern.h
+++ b/arch/um/drivers/vector_kern.h
@@ -14,6 +14,7 @@
#include <linux/ctype.h>
#include <linux/workqueue.h>
#include <linux/interrupt.h>
+#include <asm/atomic.h>
#include "vector_user.h"
@@ -44,7 +45,8 @@ struct vector_queue {
struct net_device *dev;
spinlock_t head_lock;
spinlock_t tail_lock;
- int queue_depth, head, tail, max_depth, max_iov_frags;
+ atomic_t queue_depth;
+ int head, tail, max_depth, max_iov_frags;
short options;
};
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
index b16a5e5619d3..2ea67e6fd067 100644
--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@@ -46,6 +46,9 @@
#define TRANS_FD "fd"
#define TRANS_FD_LEN strlen(TRANS_FD)
+#define TRANS_VDE "vde"
+#define TRANS_VDE_LEN strlen(TRANS_VDE)
+
#define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
@@ -434,6 +437,84 @@ fd_cleanup:
return NULL;
}
+/* enough char to store an int type */
+#define ENOUGH(type) ((CHAR_BIT * sizeof(type) - 1) / 3 + 2)
+#define ENOUGH_OCTAL(type) ((CHAR_BIT * sizeof(type) + 2) / 3)
+/* vde_plug --descr xx --port2 xx --mod2 xx --group2 xx seqpacket://NN vnl (NULL) */
+#define VDE_MAX_ARGC 12
+#define VDE_SEQPACKET_HEAD "seqpacket://"
+#define VDE_SEQPACKET_HEAD_LEN (sizeof(VDE_SEQPACKET_HEAD) - 1)
+#define VDE_DEFAULT_DESCRIPTION "UML"
+
+static struct vector_fds *user_init_vde_fds(struct arglist *ifspec)
+{
+ char seqpacketvnl[VDE_SEQPACKET_HEAD_LEN + ENOUGH(int) + 1];
+ char *argv[VDE_MAX_ARGC] = {"vde_plug"};
+ int argc = 1;
+ int rv;
+ int sv[2];
+ struct vector_fds *result = NULL;
+
+ char *vnl = uml_vector_fetch_arg(ifspec,"vnl");
+ char *descr = uml_vector_fetch_arg(ifspec,"descr");
+ char *port = uml_vector_fetch_arg(ifspec,"port");
+ char *mode = uml_vector_fetch_arg(ifspec,"mode");
+ char *group = uml_vector_fetch_arg(ifspec,"group");
+ if (descr == NULL) descr = VDE_DEFAULT_DESCRIPTION;
+
+ argv[argc++] = "--descr";
+ argv[argc++] = descr;
+ if (port != NULL) {
+ argv[argc++] = "--port2";
+ argv[argc++] = port;
+ }
+ if (mode != NULL) {
+ argv[argc++] = "--mod2";
+ argv[argc++] = mode;
+ }
+ if (group != NULL) {
+ argv[argc++] = "--group2";
+ argv[argc++] = group;
+ }
+ argv[argc++] = seqpacketvnl;
+ argv[argc++] = vnl;
+ argv[argc++] = NULL;
+
+ rv = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv);
+ if (rv < 0) {
+ printk(UM_KERN_ERR "vde: seqpacket socketpair err %d", -errno);
+ return NULL;
+ }
+ rv = os_set_exec_close(sv[0]);
+ if (rv < 0) {
+ printk(UM_KERN_ERR "vde: seqpacket socketpair cloexec err %d", -errno);
+ goto vde_cleanup_sv;
+ }
+ snprintf(seqpacketvnl, sizeof(seqpacketvnl), VDE_SEQPACKET_HEAD "%d", sv[1]);
+
+ run_helper(NULL, NULL, argv);
+
+ close(sv[1]);
+
+ result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+ if (result == NULL) {
+ printk(UM_KERN_ERR "fd open: allocation failed");
+ goto vde_cleanup;
+ }
+
+ result->rx_fd = sv[0];
+ result->tx_fd = sv[0];
+ result->remote_addr_size = 0;
+ result->remote_addr = NULL;
+ return result;
+
+vde_cleanup_sv:
+ close(sv[1]);
+vde_cleanup:
+ close(sv[0]);
+ return NULL;
+}
+
static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
{
int rxfd = -1, txfd = -1;
@@ -673,6 +754,8 @@ struct vector_fds *uml_vector_user_open(
return user_init_unix_fds(parsed, ID_BESS);
if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0)
return user_init_fd_fds(parsed);
+ if (strncmp(transport, TRANS_VDE, TRANS_VDE_LEN) == 0)
+ return user_init_vde_fds(parsed);
return NULL;
}
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 5bb397b65efb..83373c9963e7 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -359,11 +359,4 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
return pte;
}
-/* Clear a kernel PTE and flush it from the TLB */
-#define kpte_clear_flush(ptep, vaddr) \
-do { \
- pte_clear(&init_mm, (vaddr), (ptep)); \
- __flush_tlb_one((vaddr)); \
-} while (0)
-
#endif
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 5a7c05275aa7..bce4595798da 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -28,20 +28,10 @@ struct thread_struct {
struct arch_thread arch;
jmp_buf switch_buf;
struct {
- int op;
- union {
- struct {
- int pid;
- } fork, exec;
- struct {
- int (*proc)(void *);
- void *arg;
- } thread;
- struct {
- void (*proc)(void *);
- void *arg;
- } cb;
- } u;
+ struct {
+ int (*proc)(void *);
+ void *arg;
+ } thread;
} request;
};
@@ -51,7 +41,7 @@ struct thread_struct {
.fault_addr = NULL, \
.prev_sched = NULL, \
.arch = INIT_ARCH_THREAD, \
- .request = { 0 } \
+ .request = { } \
}
/*
diff --git a/arch/um/include/asm/sysrq.h b/arch/um/include/asm/sysrq.h
deleted file mode 100644
index 8fc8c65cd357..000000000000
--- a/arch/um/include/asm/sysrq.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_SYSRQ_H
-#define __UM_SYSRQ_H
-
-struct task_struct;
-extern void show_trace(struct task_struct* task, unsigned long *stack);
-
-#endif
diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h
index 1e76ba40feba..140388c282f6 100644
--- a/arch/um/include/shared/skas/mm_id.h
+++ b/arch/um/include/shared/skas/mm_id.h
@@ -7,10 +7,7 @@
#define __MM_ID_H
struct mm_id {
- union {
- int mm_fd;
- int pid;
- } u;
+ int pid;
unsigned long stack;
int syscall_data_len;
};
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index ebaa116de30b..85c50122ab98 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -10,10 +10,8 @@
extern int userspace_pid[];
-extern int user_thread(unsigned long stack, int flags);
extern void new_thread_handler(void);
extern void handle_syscall(struct uml_pt_regs *regs);
-extern long execute_syscall_skas(void *r);
extern unsigned long current_stub_stack(void);
extern struct mm_id *current_mm_id(void);
extern void current_mm_sync(void);
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 2c15bb2c104c..cb8b5cd9285c 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -35,8 +35,5 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
PT_REGS_IP(regs) = eip;
PT_REGS_SP(regs) = esp;
clear_thread_flag(TIF_SINGLESTEP);
-#ifdef SUBARCH_EXECVE1
- SUBARCH_EXECVE1(regs->regs);
-#endif
}
EXPORT_SYMBOL(start_thread);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index f36b63f53bab..be2856af6d4c 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -109,8 +109,8 @@ void new_thread_handler(void)
schedule_tail(current->thread.prev_sched);
current->thread.prev_sched = NULL;
- fn = current->thread.request.u.thread.proc;
- arg = current->thread.request.u.thread.arg;
+ fn = current->thread.request.thread.proc;
+ arg = current->thread.request.thread.arg;
/*
* callback returns only if the kernel thread execs a process
@@ -158,8 +158,8 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
arch_copy_thread(&current->thread.arch, &p->thread.arch);
} else {
get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
- p->thread.request.u.thread.proc = args->fn;
- p->thread.request.u.thread.arg = args->fn_arg;
+ p->thread.request.thread.proc = args->fn;
+ p->thread.request.thread.arg = args->fn_arg;
handler = new_thread_handler;
}
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 3736bca626ba..680bce4bd8fa 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -29,7 +29,7 @@ static void kill_off_processes(void)
t = find_lock_task_mm(p);
if (!t)
continue;
- pid = t->mm->context.id.u.pid;
+ pid = t->mm->context.id.pid;
task_unlock(t);
os_kill_ptraced_process(pid, 1);
}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 47f98d87ea3c..886ed5e65674 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -32,11 +32,11 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
new_id->stack = stack;
block_signals_trace();
- new_id->u.pid = start_userspace(stack);
+ new_id->pid = start_userspace(stack);
unblock_signals_trace();
- if (new_id->u.pid < 0) {
- ret = new_id->u.pid;
+ if (new_id->pid < 0) {
+ ret = new_id->pid;
goto out_free;
}
@@ -83,12 +83,12 @@ void destroy_context(struct mm_struct *mm)
* whole UML suddenly dying. Also, cover negative and
* 1 cases, since they shouldn't happen either.
*/
- if (mmu->id.u.pid < 2) {
+ if (mmu->id.pid < 2) {
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
- mmu->id.u.pid);
+ mmu->id.pid);
return;
}
- os_kill_ptraced_process(mmu->id.u.pid, 1);
+ os_kill_ptraced_process(mmu->id.pid, 1);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
}
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 5f9c1c5f36e2..68657988c8d1 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -39,8 +39,8 @@ int __init start_uml(void)
init_new_thread_signals();
- init_task.thread.request.u.thread.proc = start_kernel_proc;
- init_task.thread.request.u.thread.arg = NULL;
+ init_task.thread.request.thread.proc = start_kernel_proc;
+ init_task.thread.request.thread.arg = NULL;
return start_idle_thread(task_stack_page(&init_task),
&init_task.thread.switch_buf);
}
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index 9ee19e566da3..b09e85279d2b 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -12,23 +12,13 @@
#include <sysdep/syscalls.h>
#include <linux/time-internal.h>
#include <asm/unistd.h>
+#include <asm/delay.h>
void handle_syscall(struct uml_pt_regs *r)
{
struct pt_regs *regs = container_of(r, struct pt_regs, regs);
int syscall;
- /*
- * If we have infinite CPU resources, then make every syscall also a
- * preemption point, since we don't have any other preemption in this
- * case, and kernel threads would basically never run until userspace
- * went to sleep, even if said userspace interacts with the kernel in
- * various ways.
- */
- if (time_travel_mode == TT_MODE_INFCPU ||
- time_travel_mode == TT_MODE_EXTERNAL)
- schedule();
-
/* Initialize the syscall number and default return value. */
UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp);
PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS);
@@ -41,9 +31,25 @@ void handle_syscall(struct uml_pt_regs *r)
goto out;
syscall = UPT_SYSCALL_NR(r);
- if (syscall >= 0 && syscall < __NR_syscalls)
- PT_REGS_SET_SYSCALL_RETURN(regs,
- EXECUTE_SYSCALL(syscall, regs));
+ if (syscall >= 0 && syscall < __NR_syscalls) {
+ unsigned long ret = EXECUTE_SYSCALL(syscall, regs);
+
+ PT_REGS_SET_SYSCALL_RETURN(regs, ret);
+
+ /*
+ * An error value here can be some form of -ERESTARTSYS
+ * and then we'd just loop. Make any error syscalls take
+ * some time, so that it won't just loop if something is
+ * not ready, and hopefully other things will make some
+ * progress.
+ */
+ if (IS_ERR_VALUE(ret) &&
+ (time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL)) {
+ um_udelay(1);
+ schedule();
+ }
+ }
out:
syscall_trace_leave(regs);
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 746715379f12..4bb8622dc512 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -11,7 +11,6 @@
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
-#include <asm/sysrq.h>
#include <asm/stacktrace.h>
#include <os.h>
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 47b9f5e63566..29b27b90581f 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -839,7 +839,7 @@ static irqreturn_t um_timer(int irq, void *dev)
if (get_current()->mm != NULL)
{
/* userspace - relay signal, results in correct userspace timers */
- os_alarm_process(get_current()->mm->context.id.u.pid);
+ os_alarm_process(get_current()->mm->context.id.pid);
}
(*timer_clockevent.event_handler)(&timer_clockevent);
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 44c6fc697f3a..548af31d4111 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -82,16 +82,12 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
(x ? UM_PROT_EXEC : 0));
if (pte_newpage(*pte)) {
if (pte_present(*pte)) {
- if (pte_newpage(*pte)) {
- __u64 offset;
- unsigned long phys =
- pte_val(*pte) & PAGE_MASK;
- int fd = phys_mapping(phys, &offset);
-
- ret = ops->mmap(ops->mm_idp, addr,
- PAGE_SIZE, prot, fd,
- offset);
- }
+ __u64 offset;
+ unsigned long phys = pte_val(*pte) & PAGE_MASK;
+ int fd = phys_mapping(phys, &offset);
+
+ ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE,
+ prot, fd, offset);
} else
ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
} else if (pte_newprot(*pte))
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 5adf8f630049..f1d03cf3957f 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -528,7 +528,8 @@ int os_shutdown_socket(int fd, int r, int w)
ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
void *data, size_t data_len)
{
- char buf[CMSG_SPACE(sizeof(*fds) * n_fds)];
+#define MAX_RCV_FDS 2
+ char buf[CMSG_SPACE(sizeof(*fds) * MAX_RCV_FDS)];
struct cmsghdr *cmsg;
struct iovec iov = {
.iov_base = data,
@@ -538,10 +539,13 @@ ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = buf,
- .msg_controllen = sizeof(buf),
+ .msg_controllen = CMSG_SPACE(sizeof(*fds) * n_fds),
};
int n;
+ if (n_fds > MAX_RCV_FDS)
+ return -EINVAL;
+
n = recvmsg(fd, &msg, 0);
if (n < 0)
return -errno;
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index c55430775efd..9a13ac23c606 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -78,7 +78,7 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
{
struct stub_data *proc_data = (void *)mm_idp->stack;
int n, i;
- int err, pid = mm_idp->u.pid;
+ int err, pid = mm_idp->pid;
n = ptrace_setregs(pid, syscall_regs);
if (n < 0) {
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index f7088345b3fc..b6f656bcffb1 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -588,5 +588,5 @@ void reboot_skas(void)
void __switch_mm(struct mm_id *mm_idp)
{
- userspace_pid[0] = mm_idp->u.pid;
+ userspace_pid[0] = mm_idp->pid;
}
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index da8b66dce0da..327c45c5013f 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -16,6 +16,7 @@
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
+#include <asm/traps.h>
/* MMIO direction */
#define EPT_READ 0
@@ -433,6 +434,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
return -EINVAL;
}
+ if (!fault_in_kernel_space(ve->gla)) {
+ WARN_ONCE(1, "Access to userspace address is not supported");
+ return -EINVAL;
+ }
+
/*
* Reject EPT violation #VEs that split pages.
*
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 8db2ec4d6cda..1f650b4dde50 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -163,20 +163,18 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
}
#define arch_atomic64_dec_return arch_atomic64_dec_return
-static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
ASM_NO_INPUT_CLOBBER("memory"));
- return i;
}
-static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
ASM_NO_INPUT_CLOBBER("memory"));
- return i;
}
static __always_inline void arch_atomic64_inc(atomic64_t *v)
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
index 80cc6386d7b1..ca4243318aad 100644
--- a/arch/x86/include/asm/cpuid.h
+++ b/arch/x86/include/asm/cpuid.h
@@ -179,6 +179,7 @@ static __always_inline bool cpuid_function_is_indexed(u32 function)
case 0x1d:
case 0x1e:
case 0x1f:
+ case 0x24:
case 0x8000001d:
return true;
}
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 44949f972826..1a42f829667a 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -135,6 +135,8 @@
#define INTEL_LUNARLAKE_M IFM(6, 0xBD)
+#define INTEL_PANTHERLAKE_L IFM(6, 0xCC)
+
/* "Small Core" Processors (Atom/E-Core) */
#define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */
@@ -178,4 +180,7 @@
#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */
#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */
+/* Family 19 */
+#define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */
+
#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 68ad4f923664..861d080ed4c6 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -14,8 +14,8 @@ BUILD_BUG_ON(1)
* be __static_call_return0.
*/
KVM_X86_OP(check_processor_compatibility)
-KVM_X86_OP(hardware_enable)
-KVM_X86_OP(hardware_disable)
+KVM_X86_OP(enable_virtualization_cpu)
+KVM_X86_OP(disable_virtualization_cpu)
KVM_X86_OP(hardware_unsetup)
KVM_X86_OP(has_emulated_msr)
KVM_X86_OP(vcpu_after_set_cpuid)
@@ -125,7 +125,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
-KVM_X86_OP(get_msr_feature)
+KVM_X86_OP(get_feature_msr)
KVM_X86_OP(check_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked)
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4a68cb3eba78..6d9f763a7bb9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -36,6 +36,7 @@
#include <asm/kvm_page_track.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/hyperv-tlfs.h>
+#include <asm/reboot.h>
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
@@ -211,6 +212,7 @@ enum exit_fastpath_completion {
EXIT_FASTPATH_NONE,
EXIT_FASTPATH_REENTER_GUEST,
EXIT_FASTPATH_EXIT_HANDLED,
+ EXIT_FASTPATH_EXIT_USERSPACE,
};
typedef enum exit_fastpath_completion fastpath_t;
@@ -280,10 +282,6 @@ enum x86_intercept_stage;
#define PFERR_PRIVATE_ACCESS BIT_ULL(49)
#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)
-#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
- PFERR_WRITE_MASK | \
- PFERR_PRESENT_MASK)
-
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
/*
@@ -1629,8 +1627,10 @@ struct kvm_x86_ops {
int (*check_processor_compatibility)(void);
- int (*hardware_enable)(void);
- void (*hardware_disable)(void);
+ int (*enable_virtualization_cpu)(void);
+ void (*disable_virtualization_cpu)(void);
+ cpu_emergency_virt_cb *emergency_disable_virtualization_cpu;
+
void (*hardware_unsetup)(void);
bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
@@ -1727,6 +1727,8 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
+
+ const bool x2apic_icr_is_split;
const unsigned long required_apicv_inhibits;
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
@@ -1806,7 +1808,7 @@ struct kvm_x86_ops {
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
void (*guest_memory_reclaimed)(struct kvm *kvm);
- int (*get_msr_feature)(struct kvm_msr_entry *entry);
+ int (*get_feature_msr)(u32 msr, u64 *data);
int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len);
@@ -2060,6 +2062,8 @@ void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
+int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data);
int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
@@ -2136,7 +2140,15 @@ int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
void kvm_update_dr7(struct kvm_vcpu *vcpu);
-int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
+bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ bool always_retry);
+
+static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu,
+ gpa_t cr2_or_gpa)
+{
+ return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false);
+}
+
void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
ulong roots_to_free);
void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
@@ -2254,6 +2266,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_cpu_has_extint(struct kvm_vcpu *v);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
+int kvm_cpu_get_extint(struct kvm_vcpu *v);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
@@ -2345,7 +2358,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
KVM_X86_QUIRK_OUT_7E_INC_RIP | \
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
- KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS)
+ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \
+ KVM_X86_QUIRK_SLOT_ZAP_ALL)
/*
* KVM previously used a u32 field in kvm_run to indicate the hypercall was
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a7c06a46fb76..3ae84c3b8e6d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -36,6 +36,20 @@
#define EFER_FFXSR (1<<_EFER_FFXSR)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
+/*
+ * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc.
+ * Most MSRs support/allow only a subset of memory types, but the values
+ * themselves are common across all relevant MSRs.
+ */
+#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */
+#define X86_MEMTYPE_WC 1ull /* Write Combining */
+/* RESERVED 2 */
+/* RESERVED 3 */
+#define X86_MEMTYPE_WT 4ull /* Write Through */
+#define X86_MEMTYPE_WP 5ull /* Write Protected */
+#define X86_MEMTYPE_WB 6ull /* Write Back */
+#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */
+
/* FRED MSRs */
#define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */
#define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */
@@ -365,6 +379,12 @@
#define MSR_IA32_CR_PAT 0x00000277
+#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \
+ ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \
+ (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \
+ (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \
+ (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56))
+
#define MSR_IA32_DEBUGCTLMSR 0x000001d9
#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
@@ -1159,15 +1179,6 @@
#define MSR_IA32_VMX_VMFUNC 0x00000491
#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492
-/* VMX_BASIC bits and bitmasks */
-#define VMX_BASIC_VMCS_SIZE_SHIFT 32
-#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
-#define VMX_BASIC_64 0x0001000000000000LLU
-#define VMX_BASIC_MEM_TYPE_SHIFT 50
-#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
-#define VMX_BASIC_MEM_TYPE_WB 6LLU
-#define VMX_BASIC_INOUT 0x0040000000000000LLU
-
/* Resctrl MSRs: */
/* - Intel: */
#define MSR_IA32_L3_QOS_CFG 0xc81
@@ -1185,11 +1196,6 @@
#define MSR_IA32_SMBA_BW_BASE 0xc0000280
#define MSR_IA32_EVT_CFG_BASE 0xc0000400
-/* MSR_IA32_VMX_MISC bits */
-#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
-#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
-#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
-
/* AMD-V MSRs */
#define MSR_VM_CR 0xc0010114
#define MSR_VM_IGNNE 0xc0010115
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 7e9db77231ac..d1426b64c1b9 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -270,5 +270,26 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
#include <asm/pgtable-invert.h>
-#endif /* !__ASSEMBLY__ */
+#else /* __ASSEMBLY__ */
+
+#define l4_index(x) (((x) >> 39) & 511)
+#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
+L4_START_KERNEL = l4_index(__START_KERNEL_map)
+
+L3_START_KERNEL = pud_index(__START_KERNEL_map)
+
+#define SYM_DATA_START_PAGE_ALIGNED(name) \
+ SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
+
+/* Automate the creation of 1 to 1 mapping pmd entries */
+#define PMDS(START, PERM, COUNT) \
+ i = 0 ; \
+ .rept (COUNT) ; \
+ .quad (START) + (i << PMD_SHIFT) + (PERM) ; \
+ i = i + 1 ; \
+ .endr
+
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 6536873f8fc0..d0ef2a678d66 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,8 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
-#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
typedef void (cpu_emergency_virt_cb)(void);
+#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_disable_virtualization(void);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index f0dea3750ca9..2b59b9951c90 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -516,6 +516,20 @@ struct ghcb {
u32 ghcb_usage;
} __packed;
+struct vmcb {
+ struct vmcb_control_area control;
+ union {
+ struct vmcb_save_area save;
+
+ /*
+ * For SEV-ES VMs, the save area in the VMCB is used only to
+ * save/load host state. Guest state resides in a separate
+ * page, the aptly named VM Save Area (VMSA), that is encrypted
+ * with the guest's private key.
+ */
+ struct sev_es_save_area host_sev_es_save;
+ };
+} __packed;
#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
@@ -532,6 +546,7 @@ static inline void __unused_size_checks(void)
BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
+ BUILD_BUG_ON(offsetof(struct vmcb, save) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
/* Check offsets of reserved fields */
@@ -568,11 +583,6 @@ static inline void __unused_size_checks(void)
BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0);
}
-struct vmcb {
- struct vmcb_control_area control;
- struct vmcb_save_area save;
-} __packed;
-
#define SVM_CPUID_FUNC 0x8000000a
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d77a31039f24..f7fd4369b821 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -122,19 +122,17 @@
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
-#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
-#define VMX_MISC_SAVE_EFER_LMA 0x00000020
-#define VMX_MISC_ACTIVITY_HLT 0x00000040
-#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100
-#define VMX_MISC_ZERO_LEN_INS 0x40000000
-#define VMX_MISC_MSR_LIST_MULTIPLIER 512
-
/* VMFUNC functions */
#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28)
#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING)
#define VMFUNC_EPTP_ENTRIES 512
+#define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48)
+#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49)
+#define VMX_BASIC_INOUT BIT_ULL(54)
+#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
+
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
return vmx_basic & GENMASK_ULL(30, 0);
@@ -145,9 +143,30 @@ static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
}
+static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic)
+{
+ return (vmx_basic & GENMASK_ULL(53, 50)) >> 50;
+}
+
+static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype)
+{
+ return revision | ((u64)size << 32) | ((u64)memtype << 50);
+}
+
+#define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5)
+#define VMX_MISC_ACTIVITY_HLT BIT_ULL(6)
+#define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7)
+#define VMX_MISC_ACTIVITY_WAIT_SIPI BIT_ULL(8)
+#define VMX_MISC_INTEL_PT BIT_ULL(14)
+#define VMX_MISC_RDMSR_IN_SMM BIT_ULL(15)
+#define VMX_MISC_VMXOFF_BLOCK_SMI BIT_ULL(28)
+#define VMX_MISC_VMWRITE_SHADOW_RO_FIELDS BIT_ULL(29)
+#define VMX_MISC_ZERO_LEN_INS BIT_ULL(30)
+#define VMX_MISC_MSR_LIST_MULTIPLIER 512
+
static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
{
- return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+ return vmx_misc & GENMASK_ULL(4, 0);
}
static inline int vmx_misc_cr3_count(u64 vmx_misc)
@@ -508,9 +527,10 @@ enum vmcs_field {
#define VMX_EPTP_PWL_4 0x18ull
#define VMX_EPTP_PWL_5 0x20ull
#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6)
+/* The EPTP memtype is encoded in bits 2:0, i.e. doesn't need to be shifted. */
#define VMX_EPTP_MT_MASK 0x7ull
-#define VMX_EPTP_MT_WB 0x6ull
-#define VMX_EPTP_MT_UC 0x0ull
+#define VMX_EPTP_MT_WB X86_MEMTYPE_WB
+#define VMX_EPTP_MT_UC X86_MEMTYPE_UC
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index bf57a824f722..a8debbf2f702 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -439,6 +439,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
+#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index a3aa0199222e..af44fd5dbd7c 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -331,7 +331,6 @@ static const struct file_operations mce_chrdev_ops = {
.poll = mce_chrdev_poll,
.unlocked_ioctl = mce_chrdev_ioctl,
.compat_ioctl = compat_ptr_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice mce_chrdev_device = {
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 2a2fc14955cd..989d368be04f 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -55,6 +55,12 @@
#include "mtrr.h"
+static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE);
+static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB);
+static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH);
+static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT);
+static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK);
+
/* arch_phys_wc_add returns an MTRR register index plus this offset. */
#define MTRR_TO_PHYS_WC_OFFSET 1000
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index e69489d48625..972e6b6b0481 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -1567,7 +1567,6 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
static const struct file_operations pseudo_lock_dev_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = NULL,
.write = NULL,
.open = pseudo_lock_dev_open,
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 330922b328bf..16752b8dfa89 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -32,13 +32,6 @@
* We are not able to switch in one step to the final KERNEL ADDRESS SPACE
* because we need identity-mapped pages.
*/
-#define l4_index(x) (((x) >> 39) & 511)
-#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-
-L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
-L4_START_KERNEL = l4_index(__START_KERNEL_map)
-
-L3_START_KERNEL = pud_index(__START_KERNEL_map)
__HEAD
.code64
@@ -577,9 +570,6 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
SYM_CODE_END(vc_no_ghcb)
#endif
-#define SYM_DATA_START_PAGE_ALIGNED(name) \
- SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
-
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
* Each PGD needs to be 8k long and 8k aligned. We do not
@@ -601,14 +591,6 @@ SYM_CODE_END(vc_no_ghcb)
#define PTI_USER_PGD_FILL 0
#endif
-/* Automate the creation of 1 to 1 mapping pmd entries */
-#define PMDS(START, PERM, COUNT) \
- i = 0 ; \
- .rept (COUNT) ; \
- .quad (START) + (i << PMD_SHIFT) + (PERM) ; \
- i = i + 1 ; \
- .endr
-
__INITDATA
.balign 4
@@ -708,8 +690,6 @@ SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt)
.endr
SYM_DATA_END(level1_fixmap_pgt)
-#undef PMDS
-
.data
.align 16
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 2617be544480..41786b834b16 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -705,7 +705,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
- F(AMX_COMPLEX)
+ F(AMX_COMPLEX) | F(AVX10)
);
kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
@@ -721,6 +721,10 @@ void kvm_set_cpu_caps(void)
SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
);
+ kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX,
+ F(AVX10_128) | F(AVX10_256) | F(AVX10_512)
+ );
+
kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
@@ -949,7 +953,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
switch (function) {
case 0:
/* Limited to the highest leaf implemented in KVM. */
- entry->eax = min(entry->eax, 0x1fU);
+ entry->eax = min(entry->eax, 0x24U);
break;
case 1:
cpuid_entry_override(entry, CPUID_1_EDX);
@@ -1174,6 +1178,28 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break;
}
break;
+ case 0x24: {
+ u8 avx10_version;
+
+ if (!kvm_cpu_cap_has(X86_FEATURE_AVX10)) {
+ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+ break;
+ }
+
+ /*
+ * The AVX10 version is encoded in EBX[7:0]. Note, the version
+ * is guaranteed to be >=1 if AVX10 is supported. Note #2, the
+ * version needs to be captured before overriding EBX features!
+ */
+ avx10_version = min_t(u8, entry->ebx & 0xff, 1);
+ cpuid_entry_override(entry, CPUID_24_0_EBX);
+ entry->ebx |= avx10_version;
+
+ entry->eax = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ break;
+ }
case KVM_CPUID_SIGNATURE: {
const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
entry->eax = KVM_CPUID_FEATURES;
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 3d7eb11d0e45..63f66c51975a 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -108,7 +108,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
* Read pending interrupt(from non-APIC source)
* vector and intack.
*/
-static int kvm_cpu_get_extint(struct kvm_vcpu *v)
+int kvm_cpu_get_extint(struct kvm_vcpu *v)
{
if (!kvm_cpu_has_extint(v)) {
WARN_ON(!lapic_in_kernel(v));
@@ -131,6 +131,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
} else
return kvm_pic_read_irq(v->kvm); /* PIC */
}
+EXPORT_SYMBOL_GPL(kvm_cpu_get_extint);
/*
* Read pending interrupt vector and intack.
@@ -141,9 +142,12 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
if (vector != -1)
return vector; /* PIC */
- return kvm_get_apic_interrupt(v); /* APIC */
+ vector = kvm_apic_has_interrupt(v); /* APIC */
+ if (vector != -1)
+ kvm_apic_ack_interrupt(v, vector);
+
+ return vector;
}
-EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5bb481aefcbc..2098dc689088 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1944,7 +1944,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
u64 ns = 0;
ktime_t expire;
struct kvm_vcpu *vcpu = apic->vcpu;
- unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
+ u32 this_tsc_khz = vcpu->arch.virtual_tsc_khz;
unsigned long flags;
ktime_t now;
@@ -2453,6 +2453,43 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
+#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13))
+
+int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
+{
+ if (data & X2APIC_ICR_RESERVED_BITS)
+ return 1;
+
+ /*
+ * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but
+ * only AMD requires it to be zero, Intel essentially just ignores the
+ * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled,
+ * the CPU performs the reserved bits checks, i.e. the underlying CPU
+ * behavior will "win". Arbitrarily clear the BUSY bit, as there is no
+ * sane way to provide consistent behavior with respect to hardware.
+ */
+ data &= ~APIC_ICR_BUSY;
+
+ kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
+ if (kvm_x86_ops.x2apic_icr_is_split) {
+ kvm_lapic_set_reg(apic, APIC_ICR, data);
+ kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
+ } else {
+ kvm_lapic_set_reg64(apic, APIC_ICR, data);
+ }
+ trace_kvm_apic_write(APIC_ICR, data);
+ return 0;
+}
+
+static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
+{
+ if (kvm_x86_ops.x2apic_icr_is_split)
+ return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
+ (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;
+
+ return kvm_lapic_get_reg64(apic, APIC_ICR);
+}
+
/* emulate APIC access in a trap manner */
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
{
@@ -2470,7 +2507,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
* maybe-unecessary write, and both are in the noise anyways.
*/
if (apic_x2apic_mode(apic) && offset == APIC_ICR)
- kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR));
+ WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
else
kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
}
@@ -2922,14 +2959,13 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
}
}
-int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
+void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector)
{
- int vector = kvm_apic_has_interrupt(vcpu);
struct kvm_lapic *apic = vcpu->arch.apic;
u32 ppr;
- if (vector == -1)
- return -1;
+ if (WARN_ON_ONCE(vector < 0 || !apic))
+ return;
/*
* We get here even with APIC virtualization enabled, if doing
@@ -2957,8 +2993,8 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
__apic_update_ppr(apic, &ppr);
}
- return vector;
}
+EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt);
static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s, bool set)
@@ -2990,18 +3026,22 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
/*
* In x2APIC mode, the LDR is fixed and based on the id. And
- * ICR is internally a single 64-bit register, but needs to be
- * split to ICR+ICR2 in userspace for backwards compatibility.
+ * if the ICR is _not_ split, ICR is internally a single 64-bit
+ * register, but needs to be split to ICR+ICR2 in userspace for
+ * backwards compatibility.
*/
- if (set) {
+ if (set)
*ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
- icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
- (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
- __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
- } else {
- icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
- __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
+ if (!kvm_x86_ops.x2apic_icr_is_split) {
+ if (set) {
+ icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
+ (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
+ __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
+ } else {
+ icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
+ __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
+ }
}
}
@@ -3194,22 +3234,12 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
return 0;
}
-int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
-{
- data &= ~APIC_ICR_BUSY;
-
- kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
- kvm_lapic_set_reg64(apic, APIC_ICR, data);
- trace_kvm_apic_write(APIC_ICR, data);
- return 0;
-}
-
static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
{
u32 low;
if (reg == APIC_ICR) {
- *data = kvm_lapic_get_reg64(apic, APIC_ICR);
+ *data = kvm_x2apic_icr_read(apic);
return 0;
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 7ef8ae73e82d..1b8ef9856422 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -88,15 +88,14 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
+void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
-int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
int kvm_apic_accept_events(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
-u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
void kvm_recalculate_apic_map(struct kvm *kvm);
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 4341e0e28571..9dc5dd43ae7f 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -223,8 +223,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
bool kvm_mmu_may_ignore_guest_pat(void);
-int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
-
int kvm_mmu_post_init_vm(struct kvm *kvm);
void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7813d28b082f..e52f990548df 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -614,32 +614,6 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
return __get_spte_lockless(sptep);
}
-/* Returns the Accessed status of the PTE and resets it at the same time. */
-static bool mmu_spte_age(u64 *sptep)
-{
- u64 spte = mmu_spte_get_lockless(sptep);
-
- if (!is_accessed_spte(spte))
- return false;
-
- if (spte_ad_enabled(spte)) {
- clear_bit((ffs(shadow_accessed_mask) - 1),
- (unsigned long *)sptep);
- } else {
- /*
- * Capture the dirty status of the page, so that it doesn't get
- * lost when the SPTE is marked for access tracking.
- */
- if (is_writable_pte(spte))
- kvm_set_pfn_dirty(spte_to_pfn(spte));
-
- spte = mark_spte_for_access_track(spte);
- mmu_spte_update_no_track(sptep, spte);
- }
-
- return true;
-}
-
static inline bool is_tdp_mmu_active(struct kvm_vcpu *vcpu)
{
return tdp_mmu_enabled && vcpu->arch.mmu->root_role.direct;
@@ -938,6 +912,7 @@ static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu
* in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
* pte_list_desc containing more mappings.
*/
+#define KVM_RMAP_MANY BIT(0)
/*
* Returns the number of pointers in the rmap chain, not counting the new one.
@@ -950,16 +925,16 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
if (!rmap_head->val) {
rmap_head->val = (unsigned long)spte;
- } else if (!(rmap_head->val & 1)) {
+ } else if (!(rmap_head->val & KVM_RMAP_MANY)) {
desc = kvm_mmu_memory_cache_alloc(cache);
desc->sptes[0] = (u64 *)rmap_head->val;
desc->sptes[1] = spte;
desc->spte_count = 2;
desc->tail_count = 0;
- rmap_head->val = (unsigned long)desc | 1;
+ rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY;
++count;
} else {
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
count = desc->tail_count + desc->spte_count;
/*
@@ -968,10 +943,10 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
*/
if (desc->spte_count == PTE_LIST_EXT) {
desc = kvm_mmu_memory_cache_alloc(cache);
- desc->more = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ desc->more = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
desc->spte_count = 0;
desc->tail_count = count;
- rmap_head->val = (unsigned long)desc | 1;
+ rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY;
}
desc->sptes[desc->spte_count++] = spte;
}
@@ -982,7 +957,7 @@ static void pte_list_desc_remove_entry(struct kvm *kvm,
struct kvm_rmap_head *rmap_head,
struct pte_list_desc *desc, int i)
{
- struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
int j = head_desc->spte_count - 1;
/*
@@ -1011,7 +986,7 @@ static void pte_list_desc_remove_entry(struct kvm *kvm,
if (!head_desc->more)
rmap_head->val = 0;
else
- rmap_head->val = (unsigned long)head_desc->more | 1;
+ rmap_head->val = (unsigned long)head_desc->more | KVM_RMAP_MANY;
mmu_free_pte_list_desc(head_desc);
}
@@ -1024,13 +999,13 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte,
if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
return;
- if (!(rmap_head->val & 1)) {
+ if (!(rmap_head->val & KVM_RMAP_MANY)) {
if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
return;
rmap_head->val = 0;
} else {
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
while (desc) {
for (i = 0; i < desc->spte_count; ++i) {
if (desc->sptes[i] == spte) {
@@ -1063,12 +1038,12 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
if (!rmap_head->val)
return false;
- if (!(rmap_head->val & 1)) {
+ if (!(rmap_head->val & KVM_RMAP_MANY)) {
mmu_spte_clear_track_bits(kvm, (u64 *)rmap_head->val);
goto out;
}
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
for (; desc; desc = next) {
for (i = 0; i < desc->spte_count; i++)
@@ -1088,10 +1063,10 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
if (!rmap_head->val)
return 0;
- else if (!(rmap_head->val & 1))
+ else if (!(rmap_head->val & KVM_RMAP_MANY))
return 1;
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
return desc->tail_count + desc->spte_count;
}
@@ -1153,13 +1128,13 @@ static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
if (!rmap_head->val)
return NULL;
- if (!(rmap_head->val & 1)) {
+ if (!(rmap_head->val & KVM_RMAP_MANY)) {
iter->desc = NULL;
sptep = (u64 *)rmap_head->val;
goto out;
}
- iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ iter->desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
iter->pos = 0;
sptep = iter->desc->sptes[iter->pos];
out:
@@ -1307,15 +1282,6 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
return flush;
}
-/**
- * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
- * @kvm: kvm instance
- * @slot: slot to protect
- * @gfn_offset: start of the BITS_PER_LONG pages we care about
- * @mask: indicates which pages we should protect
- *
- * Used when we do not need to care about huge page mappings.
- */
static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask)
@@ -1339,16 +1305,6 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
}
}
-/**
- * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write
- * protect the page if the D-bit isn't supported.
- * @kvm: kvm instance
- * @slot: slot to clear D-bit
- * @gfn_offset: start of the BITS_PER_LONG pages we care about
- * @mask: indicates which pages we should clear D-bit
- *
- * Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap.
- */
static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask)
@@ -1372,24 +1328,16 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
}
}
-/**
- * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
- * PT level pages.
- *
- * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
- * enable dirty logging for them.
- *
- * We need to care about huge page mappings: e.g. during dirty logging we may
- * have such mappings.
- */
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask)
{
/*
- * Huge pages are NOT write protected when we start dirty logging in
- * initially-all-set mode; must write protect them here so that they
- * are split to 4K on the first write.
+ * If the slot was assumed to be "initially all dirty", write-protect
+ * huge pages to ensure they are split to 4KiB on the first write (KVM
+ * dirty logs at 4KiB granularity). If eager page splitting is enabled,
+ * immediately try to split huge pages, e.g. so that vCPUs don't get
+ * saddled with the cost of splitting.
*
* The gfn_offset is guaranteed to be aligned to 64, but the base_gfn
* of memslot has no such restriction, so the range can cross two large
@@ -1411,7 +1359,16 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
PG_LEVEL_2M);
}
- /* Now handle 4K PTEs. */
+ /*
+ * (Re)Enable dirty logging for all 4KiB SPTEs that map the GFNs in
+ * mask. If PML is enabled and the GFN doesn't need to be write-
+ * protected for other reasons, e.g. shadow paging, clear the Dirty bit.
+ * Otherwise clear the Writable bit.
+ *
+ * Note that kvm_mmu_clear_dirty_pt_masked() is called whenever PML is
+ * enabled but it chooses between clearing the Dirty bit and Writeable
+ * bit based on the context.
+ */
if (kvm_x86_ops.cpu_dirty_log_size)
kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask);
else
@@ -1453,16 +1410,10 @@ static bool kvm_vcpu_write_protect_gfn(struct kvm_vcpu *vcpu, u64 gfn)
return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn, PG_LEVEL_4K);
}
-static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- const struct kvm_memory_slot *slot)
-{
- return kvm_zap_all_rmap_sptes(kvm, rmap_head);
-}
-
static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn, int level)
+ const struct kvm_memory_slot *slot)
{
- return __kvm_zap_rmap(kvm, rmap_head, slot);
+ return kvm_zap_all_rmap_sptes(kvm, rmap_head);
}
struct slot_rmap_walk_iterator {
@@ -1513,7 +1464,7 @@ static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator)
static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
{
while (++iterator->rmap <= iterator->end_rmap) {
- iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level));
+ iterator->gfn += KVM_PAGES_PER_HPAGE(iterator->level);
if (iterator->rmap->val)
return;
@@ -1534,23 +1485,71 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
slot_rmap_walk_okay(_iter_); \
slot_rmap_walk_next(_iter_))
-typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn,
- int level);
+/* The return value indicates if tlb flush on all vcpus is needed. */
+typedef bool (*slot_rmaps_handler) (struct kvm *kvm,
+ struct kvm_rmap_head *rmap_head,
+ const struct kvm_memory_slot *slot);
-static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
- struct kvm_gfn_range *range,
- rmap_handler_t handler)
+static __always_inline bool __walk_slot_rmaps(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ slot_rmaps_handler fn,
+ int start_level, int end_level,
+ gfn_t start_gfn, gfn_t end_gfn,
+ bool can_yield, bool flush_on_yield,
+ bool flush)
{
struct slot_rmap_walk_iterator iterator;
- bool ret = false;
- for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
- range->start, range->end - 1, &iterator)
- ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
- iterator.level);
+ lockdep_assert_held_write(&kvm->mmu_lock);
- return ret;
+ for_each_slot_rmap_range(slot, start_level, end_level, start_gfn,
+ end_gfn, &iterator) {
+ if (iterator.rmap)
+ flush |= fn(kvm, iterator.rmap, slot);
+
+ if (!can_yield)
+ continue;
+
+ if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
+ if (flush && flush_on_yield) {
+ kvm_flush_remote_tlbs_range(kvm, start_gfn,
+ iterator.gfn - start_gfn + 1);
+ flush = false;
+ }
+ cond_resched_rwlock_write(&kvm->mmu_lock);
+ }
+ }
+
+ return flush;
+}
+
+static __always_inline bool walk_slot_rmaps(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ slot_rmaps_handler fn,
+ int start_level, int end_level,
+ bool flush_on_yield)
+{
+ return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level,
+ slot->base_gfn, slot->base_gfn + slot->npages - 1,
+ true, flush_on_yield, false);
+}
+
+static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ slot_rmaps_handler fn,
+ bool flush_on_yield)
+{
+ return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield);
+}
+
+static bool __kvm_rmap_zap_gfn_range(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ gfn_t start, gfn_t end, bool can_yield,
+ bool flush)
+{
+ return __walk_slot_rmaps(kvm, slot, kvm_zap_rmap,
+ PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
+ start, end - 1, can_yield, true, flush);
}
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -1558,7 +1557,9 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
bool flush = false;
if (kvm_memslots_have_rmaps(kvm))
- flush = kvm_handle_gfn_range(kvm, range, kvm_zap_rmap);
+ flush = __kvm_rmap_zap_gfn_range(kvm, range->slot,
+ range->start, range->end,
+ range->may_block, flush);
if (tdp_mmu_enabled)
flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
@@ -1570,31 +1571,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return flush;
}
-static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn, int level)
-{
- u64 *sptep;
- struct rmap_iterator iter;
- int young = 0;
-
- for_each_rmap_spte(rmap_head, &iter, sptep)
- young |= mmu_spte_age(sptep);
-
- return young;
-}
-
-static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn, int level)
-{
- u64 *sptep;
- struct rmap_iterator iter;
-
- for_each_rmap_spte(rmap_head, &iter, sptep)
- if (is_accessed_spte(*sptep))
- return true;
- return false;
-}
-
#define RMAP_RECYCLE_THRESHOLD 1000
static void __rmap_add(struct kvm *kvm,
@@ -1629,12 +1605,52 @@ static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot,
__rmap_add(vcpu->kvm, cache, slot, spte, gfn, access);
}
+static bool kvm_rmap_age_gfn_range(struct kvm *kvm,
+ struct kvm_gfn_range *range, bool test_only)
+{
+ struct slot_rmap_walk_iterator iterator;
+ struct rmap_iterator iter;
+ bool young = false;
+ u64 *sptep;
+
+ for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
+ range->start, range->end - 1, &iterator) {
+ for_each_rmap_spte(iterator.rmap, &iter, sptep) {
+ u64 spte = *sptep;
+
+ if (!is_accessed_spte(spte))
+ continue;
+
+ if (test_only)
+ return true;
+
+ if (spte_ad_enabled(spte)) {
+ clear_bit((ffs(shadow_accessed_mask) - 1),
+ (unsigned long *)sptep);
+ } else {
+ /*
+ * Capture the dirty status of the page, so that
+ * it doesn't get lost when the SPTE is marked
+ * for access tracking.
+ */
+ if (is_writable_pte(spte))
+ kvm_set_pfn_dirty(spte_to_pfn(spte));
+
+ spte = mark_spte_for_access_track(spte);
+ mmu_spte_update_no_track(sptep, spte);
+ }
+ young = true;
+ }
+ }
+ return young;
+}
+
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
bool young = false;
if (kvm_memslots_have_rmaps(kvm))
- young = kvm_handle_gfn_range(kvm, range, kvm_age_rmap);
+ young = kvm_rmap_age_gfn_range(kvm, range, false);
if (tdp_mmu_enabled)
young |= kvm_tdp_mmu_age_gfn_range(kvm, range);
@@ -1647,7 +1663,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
bool young = false;
if (kvm_memslots_have_rmaps(kvm))
- young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmap);
+ young = kvm_rmap_age_gfn_range(kvm, range, true);
if (tdp_mmu_enabled)
young |= kvm_tdp_mmu_test_age_gfn(kvm, range);
@@ -2713,36 +2729,49 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
write_unlock(&kvm->mmu_lock);
}
-int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
+bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ bool always_retry)
{
- struct kvm_mmu_page *sp;
+ struct kvm *kvm = vcpu->kvm;
LIST_HEAD(invalid_list);
- int r;
+ struct kvm_mmu_page *sp;
+ gpa_t gpa = cr2_or_gpa;
+ bool r = false;
+
+ /*
+ * Bail early if there aren't any write-protected shadow pages to avoid
+ * unnecessarily taking mmu_lock lock, e.g. if the gfn is write-tracked
+ * by a third party. Reading indirect_shadow_pages without holding
+ * mmu_lock is safe, as this is purely an optimization, i.e. a false
+ * positive is benign, and a false negative will simply result in KVM
+ * skipping the unprotect+retry path, which is also an optimization.
+ */
+ if (!READ_ONCE(kvm->arch.indirect_shadow_pages))
+ goto out;
+
+ if (!vcpu->arch.mmu->root_role.direct) {
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
+ if (gpa == INVALID_GPA)
+ goto out;
+ }
- r = 0;
write_lock(&kvm->mmu_lock);
- for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) {
- r = 1;
+ for_each_gfn_valid_sp_with_gptes(kvm, sp, gpa_to_gfn(gpa))
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
- }
+
+ /*
+ * Snapshot the result before zapping, as zapping will remove all list
+ * entries, i.e. checking the list later would yield a false negative.
+ */
+ r = !list_empty(&invalid_list);
kvm_mmu_commit_zap_page(kvm, &invalid_list);
write_unlock(&kvm->mmu_lock);
- return r;
-}
-
-static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
-{
- gpa_t gpa;
- int r;
-
- if (vcpu->arch.mmu->root_role.direct)
- return 0;
-
- gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
-
- r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-
+out:
+ if (r || always_retry) {
+ vcpu->arch.last_retry_eip = kvm_rip_read(vcpu);
+ vcpu->arch.last_retry_addr = cr2_or_gpa;
+ }
return r;
}
@@ -2914,10 +2943,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
trace_kvm_mmu_set_spte(level, gfn, sptep);
}
- if (wrprot) {
- if (write_fault)
- ret = RET_PF_EMULATE;
- }
+ if (wrprot && write_fault)
+ ret = RET_PF_WRITE_PROTECTED;
if (flush)
kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
@@ -4549,7 +4576,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
return RET_PF_RETRY;
if (page_fault_handle_page_track(vcpu, fault))
- return RET_PF_EMULATE;
+ return RET_PF_WRITE_PROTECTED;
r = fast_page_fault(vcpu, fault);
if (r != RET_PF_INVALID)
@@ -4618,8 +4645,6 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
if (!flags) {
trace_kvm_page_fault(vcpu, fault_address, error_code);
- if (kvm_event_needs_reinjection(vcpu))
- kvm_mmu_unprotect_page_virt(vcpu, fault_address);
r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
insn_len);
} else if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
@@ -4642,7 +4667,7 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
int r;
if (page_fault_handle_page_track(vcpu, fault))
- return RET_PF_EMULATE;
+ return RET_PF_WRITE_PROTECTED;
r = fast_page_fault(vcpu, fault);
if (r != RET_PF_INVALID)
@@ -4719,6 +4744,7 @@ static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
switch (r) {
case RET_PF_FIXED:
case RET_PF_SPURIOUS:
+ case RET_PF_WRITE_PROTECTED:
return 0;
case RET_PF_EMULATE:
@@ -5963,6 +5989,106 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
write_unlock(&vcpu->kvm->mmu_lock);
}
+static bool is_write_to_guest_page_table(u64 error_code)
+{
+ const u64 mask = PFERR_GUEST_PAGE_MASK | PFERR_WRITE_MASK | PFERR_PRESENT_MASK;
+
+ return (error_code & mask) == mask;
+}
+
+static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ u64 error_code, int *emulation_type)
+{
+ bool direct = vcpu->arch.mmu->root_role.direct;
+
+ /*
+ * Do not try to unprotect and retry if the vCPU re-faulted on the same
+ * RIP with the same address that was previously unprotected, as doing
+ * so will likely put the vCPU into an infinite. E.g. if the vCPU uses
+ * a non-page-table modifying instruction on the PDE that points to the
+ * instruction, then unprotecting the gfn will unmap the instruction's
+ * code, i.e. make it impossible for the instruction to ever complete.
+ */
+ if (vcpu->arch.last_retry_eip == kvm_rip_read(vcpu) &&
+ vcpu->arch.last_retry_addr == cr2_or_gpa)
+ return RET_PF_EMULATE;
+
+ /*
+ * Reset the unprotect+retry values that guard against infinite loops.
+ * The values will be refreshed if KVM explicitly unprotects a gfn and
+ * retries, in all other cases it's safe to retry in the future even if
+ * the next page fault happens on the same RIP+address.
+ */
+ vcpu->arch.last_retry_eip = 0;
+ vcpu->arch.last_retry_addr = 0;
+
+ /*
+ * It should be impossible to reach this point with an MMIO cache hit,
+ * as RET_PF_WRITE_PROTECTED is returned if and only if there's a valid,
+ * writable memslot, and creating a memslot should invalidate the MMIO
+ * cache by way of changing the memslot generation. WARN and disallow
+ * retry if MMIO is detected, as retrying MMIO emulation is pointless
+ * and could put the vCPU into an infinite loop because the processor
+ * will keep faulting on the non-existent MMIO address.
+ */
+ if (WARN_ON_ONCE(mmio_info_in_cache(vcpu, cr2_or_gpa, direct)))
+ return RET_PF_EMULATE;
+
+ /*
+ * Before emulating the instruction, check to see if the access was due
+ * to a read-only violation while the CPU was walking non-nested NPT
+ * page tables, i.e. for a direct MMU, for _guest_ page tables in L1.
+ * If L1 is sharing (a subset of) its page tables with L2, e.g. by
+ * having nCR3 share lower level page tables with hCR3, then when KVM
+ * (L0) write-protects the nested NPTs, i.e. npt12 entries, KVM is also
+ * unknowingly write-protecting L1's guest page tables, which KVM isn't
+ * shadowing.
+ *
+ * Because the CPU (by default) walks NPT page tables using a write
+ * access (to ensure the CPU can do A/D updates), page walks in L1 can
+ * trigger write faults for the above case even when L1 isn't modifying
+ * PTEs. As a result, KVM will unnecessarily emulate (or at least, try
+ * to emulate) an excessive number of L1 instructions; because L1's MMU
+ * isn't shadowed by KVM, there is no need to write-protect L1's gPTEs
+ * and thus no need to emulate in order to guarantee forward progress.
+ *
+ * Try to unprotect the gfn, i.e. zap any shadow pages, so that L1 can
+ * proceed without triggering emulation. If one or more shadow pages
+ * was zapped, skip emulation and resume L1 to let it natively execute
+ * the instruction. If no shadow pages were zapped, then the write-
+ * fault is due to something else entirely, i.e. KVM needs to emulate,
+ * as resuming the guest will put it into an infinite loop.
+ *
+ * Note, this code also applies to Intel CPUs, even though it is *very*
+ * unlikely that an L1 will share its page tables (IA32/PAE/paging64
+ * format) with L2's page tables (EPT format).
+ *
+ * For indirect MMUs, i.e. if KVM is shadowing the current MMU, try to
+ * unprotect the gfn and retry if an event is awaiting reinjection. If
+ * KVM emulates multiple instructions before completing event injection,
+ * the event could be delayed beyond what is architecturally allowed,
+ * e.g. KVM could inject an IRQ after the TPR has been raised.
+ */
+ if (((direct && is_write_to_guest_page_table(error_code)) ||
+ (!direct && kvm_event_needs_reinjection(vcpu))) &&
+ kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa))
+ return RET_PF_RETRY;
+
+ /*
+ * The gfn is write-protected, but if KVM detects its emulating an
+ * instruction that is unlikely to be used to modify page tables, or if
+ * emulation fails, KVM can try to unprotect the gfn and let the CPU
+ * re-execute the instruction that caused the page fault. Do not allow
+ * retrying an instruction from a nested guest as KVM is only explicitly
+ * shadowing L1's page tables, i.e. unprotecting something for L1 isn't
+ * going to magically fix whatever issue caused L2 to fail.
+ */
+ if (!is_guest_mode(vcpu))
+ *emulation_type |= EMULTYPE_ALLOW_RETRY_PF;
+
+ return RET_PF_EMULATE;
+}
+
int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len)
{
@@ -6008,6 +6134,10 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
if (r < 0)
return r;
+ if (r == RET_PF_WRITE_PROTECTED)
+ r = kvm_mmu_write_protect_fault(vcpu, cr2_or_gpa, error_code,
+ &emulation_type);
+
if (r == RET_PF_FIXED)
vcpu->stat.pf_fixed++;
else if (r == RET_PF_EMULATE)
@@ -6018,32 +6148,6 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
if (r != RET_PF_EMULATE)
return 1;
- /*
- * Before emulating the instruction, check if the error code
- * was due to a RO violation while translating the guest page.
- * This can occur when using nested virtualization with nested
- * paging in both guests. If true, we simply unprotect the page
- * and resume the guest.
- */
- if (vcpu->arch.mmu->root_role.direct &&
- (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
- kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
- return 1;
- }
-
- /*
- * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
- * optimistically try to just unprotect the page and let the processor
- * re-execute the instruction that caused the page fault. Do not allow
- * retrying MMIO emulation, as it's not only pointless but could also
- * cause us to enter an infinite loop because the processor will keep
- * faulting on the non-existent MMIO address. Retrying an instruction
- * from a nested guest is also pointless and dangerous as we are only
- * explicitly shadowing L1's page tables, i.e. unprotecting something
- * for L1 isn't going to magically fix whatever issue cause L2 to fail.
- */
- if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
- emulation_type |= EMULTYPE_ALLOW_RETRY_PF;
emulate:
return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
insn_len);
@@ -6202,59 +6306,6 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
}
EXPORT_SYMBOL_GPL(kvm_configure_mmu);
-/* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_rmaps_handler) (struct kvm *kvm,
- struct kvm_rmap_head *rmap_head,
- const struct kvm_memory_slot *slot);
-
-static __always_inline bool __walk_slot_rmaps(struct kvm *kvm,
- const struct kvm_memory_slot *slot,
- slot_rmaps_handler fn,
- int start_level, int end_level,
- gfn_t start_gfn, gfn_t end_gfn,
- bool flush_on_yield, bool flush)
-{
- struct slot_rmap_walk_iterator iterator;
-
- lockdep_assert_held_write(&kvm->mmu_lock);
-
- for_each_slot_rmap_range(slot, start_level, end_level, start_gfn,
- end_gfn, &iterator) {
- if (iterator.rmap)
- flush |= fn(kvm, iterator.rmap, slot);
-
- if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
- if (flush && flush_on_yield) {
- kvm_flush_remote_tlbs_range(kvm, start_gfn,
- iterator.gfn - start_gfn + 1);
- flush = false;
- }
- cond_resched_rwlock_write(&kvm->mmu_lock);
- }
- }
-
- return flush;
-}
-
-static __always_inline bool walk_slot_rmaps(struct kvm *kvm,
- const struct kvm_memory_slot *slot,
- slot_rmaps_handler fn,
- int start_level, int end_level,
- bool flush_on_yield)
-{
- return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level,
- slot->base_gfn, slot->base_gfn + slot->npages - 1,
- flush_on_yield, false);
-}
-
-static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm,
- const struct kvm_memory_slot *slot,
- slot_rmaps_handler fn,
- bool flush_on_yield)
-{
- return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield);
-}
-
static void free_mmu_pages(struct kvm_mmu *mmu)
{
if (!tdp_enabled && mmu->pae_root)
@@ -6528,9 +6579,8 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e
if (WARN_ON_ONCE(start >= end))
continue;
- flush = __walk_slot_rmaps(kvm, memslot, __kvm_zap_rmap,
- PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
- start, end - 1, true, flush);
+ flush = __kvm_rmap_zap_gfn_range(kvm, memslot, start,
+ end, true, flush);
}
}
@@ -6818,7 +6868,7 @@ static void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm,
*/
for (level = KVM_MAX_HUGEPAGE_LEVEL; level > target_level; level--)
__walk_slot_rmaps(kvm, slot, shadow_mmu_try_split_huge_pages,
- level, level, start, end - 1, true, false);
+ level, level, start, end - 1, true, true, false);
}
/* Must be called with the mmu_lock held in write-mode. */
@@ -6997,10 +7047,42 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
kvm_mmu_zap_all(kvm);
}
+/*
+ * Zapping leaf SPTEs with memslot range when a memslot is moved/deleted.
+ *
+ * Zapping non-leaf SPTEs, a.k.a. not-last SPTEs, isn't required, worst
+ * case scenario we'll have unused shadow pages lying around until they
+ * are recycled due to age or when the VM is destroyed.
+ */
+static void kvm_mmu_zap_memslot_leafs(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+ struct kvm_gfn_range range = {
+ .slot = slot,
+ .start = slot->base_gfn,
+ .end = slot->base_gfn + slot->npages,
+ .may_block = true,
+ };
+
+ write_lock(&kvm->mmu_lock);
+ if (kvm_unmap_gfn_range(kvm, &range))
+ kvm_flush_remote_tlbs_memslot(kvm, slot);
+
+ write_unlock(&kvm->mmu_lock);
+}
+
+static inline bool kvm_memslot_flush_zap_all(struct kvm *kvm)
+{
+ return kvm->arch.vm_type == KVM_X86_DEFAULT_VM &&
+ kvm_check_has_quirk(kvm, KVM_X86_QUIRK_SLOT_ZAP_ALL);
+}
+
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
- kvm_mmu_zap_all_fast(kvm);
+ if (kvm_memslot_flush_zap_all(kvm))
+ kvm_mmu_zap_all_fast(kvm);
+ else
+ kvm_mmu_zap_memslot_leafs(kvm, slot);
}
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 1721d97743e9..c98827840e07 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -258,6 +258,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
* RET_PF_CONTINUE: So far, so good, keep handling the page fault.
* RET_PF_RETRY: let CPU fault again on the address.
* RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
+ * RET_PF_WRITE_PROTECTED: the gfn is write-protected, either unprotected the
+ * gfn and retry, or emulate the instruction directly.
* RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
* RET_PF_FIXED: The faulting entry has been fixed.
* RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
@@ -274,6 +276,7 @@ enum {
RET_PF_CONTINUE = 0,
RET_PF_RETRY,
RET_PF_EMULATE,
+ RET_PF_WRITE_PROTECTED,
RET_PF_INVALID,
RET_PF_FIXED,
RET_PF_SPURIOUS,
@@ -349,8 +352,6 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level);
-void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
-
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index 195d98bc8de8..f35a830ce469 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -57,6 +57,7 @@
TRACE_DEFINE_ENUM(RET_PF_CONTINUE);
TRACE_DEFINE_ENUM(RET_PF_RETRY);
TRACE_DEFINE_ENUM(RET_PF_EMULATE);
+TRACE_DEFINE_ENUM(RET_PF_WRITE_PROTECTED);
TRACE_DEFINE_ENUM(RET_PF_INVALID);
TRACE_DEFINE_ENUM(RET_PF_FIXED);
TRACE_DEFINE_ENUM(RET_PF_SPURIOUS);
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 69941cebb3a8..ae7d39ff2d07 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -646,10 +646,10 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
* really care if it changes underneath us after this point).
*/
if (FNAME(gpte_changed)(vcpu, gw, top_level))
- goto out_gpte_changed;
+ return RET_PF_RETRY;
if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
- goto out_gpte_changed;
+ return RET_PF_RETRY;
/*
* Load a new root and retry the faulting instruction in the extremely
@@ -659,7 +659,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
*/
if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) {
kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu);
- goto out_gpte_changed;
+ return RET_PF_RETRY;
}
for_each_shadow_entry(vcpu, fault->addr, it) {
@@ -674,34 +674,38 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
sp = kvm_mmu_get_child_sp(vcpu, it.sptep, table_gfn,
false, access);
- if (sp != ERR_PTR(-EEXIST)) {
- /*
- * We must synchronize the pagetable before linking it
- * because the guest doesn't need to flush tlb when
- * the gpte is changed from non-present to present.
- * Otherwise, the guest may use the wrong mapping.
- *
- * For PG_LEVEL_4K, kvm_mmu_get_page() has already
- * synchronized it transiently via kvm_sync_page().
- *
- * For higher level pagetable, we synchronize it via
- * the slower mmu_sync_children(). If it needs to
- * break, some progress has been made; return
- * RET_PF_RETRY and retry on the next #PF.
- * KVM_REQ_MMU_SYNC is not necessary but it
- * expedites the process.
- */
- if (sp->unsync_children &&
- mmu_sync_children(vcpu, sp, false))
- return RET_PF_RETRY;
- }
+ /*
+ * Synchronize the new page before linking it, as the CPU (KVM)
+ * is architecturally disallowed from inserting non-present
+ * entries into the TLB, i.e. the guest isn't required to flush
+ * the TLB when changing the gPTE from non-present to present.
+ *
+ * For PG_LEVEL_4K, kvm_mmu_find_shadow_page() has already
+ * synchronized the page via kvm_sync_page().
+ *
+ * For higher level pages, which cannot be unsync themselves
+ * but can have unsync children, synchronize via the slower
+ * mmu_sync_children(). If KVM needs to drop mmu_lock due to
+ * contention or to reschedule, instruct the caller to retry
+ * the #PF (mmu_sync_children() ensures forward progress will
+ * be made).
+ */
+ if (sp != ERR_PTR(-EEXIST) && sp->unsync_children &&
+ mmu_sync_children(vcpu, sp, false))
+ return RET_PF_RETRY;
/*
- * Verify that the gpte in the page we've just write
- * protected is still there.
+ * Verify that the gpte in the page, which is now either
+ * write-protected or unsync, wasn't modified between the fault
+ * and acquiring mmu_lock. This needs to be done even when
+ * reusing an existing shadow page to ensure the information
+ * gathered by the walker matches the information stored in the
+ * shadow page (which could have been modified by a different
+ * vCPU even if the page was already linked). Holding mmu_lock
+ * prevents the shadow page from changing after this point.
*/
if (FNAME(gpte_changed)(vcpu, gw, it.level - 1))
- goto out_gpte_changed;
+ return RET_PF_RETRY;
if (sp != ERR_PTR(-EEXIST))
link_shadow_page(vcpu, it.sptep, sp);
@@ -755,9 +759,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
return ret;
-
-out_gpte_changed:
- return RET_PF_RETRY;
}
/*
@@ -805,7 +806,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
if (page_fault_handle_page_track(vcpu, fault)) {
shadow_page_table_clear_flood(vcpu, fault->addr);
- return RET_PF_EMULATE;
+ return RET_PF_WRITE_PROTECTED;
}
r = mmu_topup_memory_caches(vcpu, true);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 3c55955bcaf8..3b996c1fdaab 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1046,10 +1046,8 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
* protected, emulation is needed. If the emulation was skipped,
* the vCPU would have the same fault again.
*/
- if (wrprot) {
- if (fault->write)
- ret = RET_PF_EMULATE;
- }
+ if (wrprot && fault->write)
+ ret = RET_PF_WRITE_PROTECTED;
/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) {
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index 2f4e155080ba..0d17d6b70639 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -17,6 +17,7 @@ enum kvm_only_cpuid_leafs {
CPUID_8000_0007_EDX,
CPUID_8000_0022_EAX,
CPUID_7_2_EDX,
+ CPUID_24_0_EBX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -46,6 +47,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
#define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
+#define X86_FEATURE_AVX10 KVM_X86_FEATURE(CPUID_7_1_EDX, 19)
/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */
#define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0)
@@ -55,6 +57,11 @@ enum kvm_only_cpuid_leafs {
#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
+/* Intel-defined sub-features, CPUID level 0x00000024:0 (EBX) */
+#define X86_FEATURE_AVX10_128 KVM_X86_FEATURE(CPUID_24_0_EBX, 16)
+#define X86_FEATURE_AVX10_256 KVM_X86_FEATURE(CPUID_24_0_EBX, 17)
+#define X86_FEATURE_AVX10_512 KVM_X86_FEATURE(CPUID_24_0_EBX, 18)
+
/* CPUID level 0x80000007 (EDX). */
#define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8)
@@ -90,6 +97,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX},
[CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX},
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
+ [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX},
};
/*
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 00e3c27d2a87..85241c0c7f56 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -624,17 +624,31 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
#endif
/*
- * Give leave_smm() a chance to make ISA-specific changes to the vCPU
- * state (e.g. enter guest mode) before loading state from the SMM
- * state-save area.
+ * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest
+ * mode should happen _after_ loading state from SMRAM. However, KVM
+ * piggybacks the nested VM-Enter flows (which is wrong for many other
+ * reasons), and so nSVM/nVMX would clobber state that is loaded from
+ * SMRAM and from the VMCS/VMCB.
*/
if (kvm_x86_call(leave_smm)(vcpu, &smram))
return X86EMUL_UNHANDLEABLE;
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- return rsm_load_state_64(ctxt, &smram.smram64);
+ ret = rsm_load_state_64(ctxt, &smram.smram64);
else
#endif
- return rsm_load_state_32(ctxt, &smram.smram32);
+ ret = rsm_load_state_32(ctxt, &smram.smram32);
+
+ /*
+ * If RSM fails and triggers shutdown, architecturally the shutdown
+ * occurs *before* the transition to guest mode. But due to KVM's
+ * flawed handling of RSM to L2 (see above), the vCPU may already be
+ * in_guest_mode(). Force the vCPU out of guest mode before delivering
+ * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit
+ * that architecturally shouldn't be possible.
+ */
+ if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu))
+ kvm_leave_nested(vcpu);
+ return ret;
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 6f704c1037e5..d5314cb7dff4 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1693,8 +1693,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
ret = -ENOMEM;
- ctl = kzalloc(sizeof(*ctl), GFP_KERNEL_ACCOUNT);
- save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT);
+ ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
+ save = kzalloc(sizeof(*save), GFP_KERNEL);
if (!ctl || !save)
goto out_free;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5ab2c92c7331..9df3e1e5ae81 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -573,7 +573,7 @@ static void __svm_write_tsc_multiplier(u64 multiplier)
static __always_inline struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd)
{
- return page_address(sd->save_area) + 0x400;
+ return &sd->save_area->host_sev_es_save;
}
static inline void kvm_cpu_svm_disable(void)
@@ -592,14 +592,14 @@ static inline void kvm_cpu_svm_disable(void)
}
}
-static void svm_emergency_disable(void)
+static void svm_emergency_disable_virtualization_cpu(void)
{
kvm_rebooting = true;
kvm_cpu_svm_disable();
}
-static void svm_hardware_disable(void)
+static void svm_disable_virtualization_cpu(void)
{
/* Make sure we clean up behind us */
if (tsc_scaling)
@@ -610,7 +610,7 @@ static void svm_hardware_disable(void)
amd_pmu_disable_virt();
}
-static int svm_hardware_enable(void)
+static int svm_enable_virtualization_cpu(void)
{
struct svm_cpu_data *sd;
@@ -696,7 +696,7 @@ static void svm_cpu_uninit(int cpu)
return;
kfree(sd->sev_vmcbs);
- __free_page(sd->save_area);
+ __free_page(__sme_pa_to_page(sd->save_area_pa));
sd->save_area_pa = 0;
sd->save_area = NULL;
}
@@ -704,23 +704,24 @@ static void svm_cpu_uninit(int cpu)
static int svm_cpu_init(int cpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
+ struct page *save_area_page;
int ret = -ENOMEM;
memset(sd, 0, sizeof(struct svm_cpu_data));
- sd->save_area = snp_safe_alloc_page_node(cpu_to_node(cpu), GFP_KERNEL);
- if (!sd->save_area)
+ save_area_page = snp_safe_alloc_page_node(cpu_to_node(cpu), GFP_KERNEL);
+ if (!save_area_page)
return ret;
ret = sev_cpu_init(sd);
if (ret)
goto free_save_area;
- sd->save_area_pa = __sme_page_pa(sd->save_area);
+ sd->save_area = page_address(save_area_page);
+ sd->save_area_pa = __sme_page_pa(save_area_page);
return 0;
free_save_area:
- __free_page(sd->save_area);
- sd->save_area = NULL;
+ __free_page(save_area_page);
return ret;
}
@@ -1124,8 +1125,7 @@ static void svm_hardware_unsetup(void)
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
- __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT),
- get_order(IOPM_SIZE));
+ __free_pages(__sme_pa_to_page(iopm_base), get_order(IOPM_SIZE));
iopm_base = 0;
}
@@ -1301,7 +1301,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
if (!kvm_hlt_in_guest(vcpu->kvm))
svm_set_intercept(svm, INTERCEPT_HLT);
- control->iopm_base_pa = __sme_set(iopm_base);
+ control->iopm_base_pa = iopm_base;
control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
control->int_ctl = V_INTR_MASKING_MASK;
@@ -1503,7 +1503,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
sev_free_vcpu(vcpu);
- __free_page(pfn_to_page(__sme_clr(svm->vmcb01.pa) >> PAGE_SHIFT));
+ __free_page(__sme_pa_to_page(svm->vmcb01.pa));
__free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
}
@@ -1533,7 +1533,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* TSC_AUX is always virtualized for SEV-ES guests when the feature is
* available. The user return MSR support is not required in this case
* because TSC_AUX is restored on #VMEXIT from the host save area
- * (which has been initialized in svm_hardware_enable()).
+ * (which has been initialized in svm_enable_virtualization_cpu()).
*/
if (likely(tsc_aux_uret_slot >= 0) &&
(!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm)))
@@ -2825,17 +2825,17 @@ static int efer_trap(struct kvm_vcpu *vcpu)
return kvm_complete_insn_gp(vcpu, ret);
}
-static int svm_get_msr_feature(struct kvm_msr_entry *msr)
+static int svm_get_feature_msr(u32 msr, u64 *data)
{
- msr->data = 0;
+ *data = 0;
- switch (msr->index) {
+ switch (msr) {
case MSR_AMD64_DE_CFG:
if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
- msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE;
+ *data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE;
break;
default:
- return KVM_MSR_RET_INVALID;
+ return KVM_MSR_RET_UNSUPPORTED;
}
return 0;
@@ -3144,7 +3144,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
* feature is available. The user return MSR support is not
* required in this case because TSC_AUX is restored on #VMEXIT
* from the host save area (which has been initialized in
- * svm_hardware_enable()).
+ * svm_enable_virtualization_cpu()).
*/
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm))
break;
@@ -3191,18 +3191,21 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm_pr_unimpl_wrmsr(vcpu, ecx, data);
break;
case MSR_AMD64_DE_CFG: {
- struct kvm_msr_entry msr_entry;
+ u64 supported_de_cfg;
- msr_entry.index = msr->index;
- if (svm_get_msr_feature(&msr_entry))
+ if (svm_get_feature_msr(ecx, &supported_de_cfg))
return 1;
- /* Check the supported bits */
- if (data & ~msr_entry.data)
+ if (data & ~supported_de_cfg)
return 1;
- /* Don't allow the guest to change a bit, #GP */
- if (!msr->host_initiated && (data ^ msr_entry.data))
+ /*
+ * Don't let the guest change the host-programmed value. The
+ * MSR is very model specific, i.e. contains multiple bits that
+ * are completely unknown to KVM, and the one bit known to KVM
+ * is simply a reflection of hardware capabilities.
+ */
+ if (!msr->host_initiated && data != svm->msr_decfg)
return 1;
svm->msr_decfg = data;
@@ -4156,12 +4159,21 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
if (is_guest_mode(vcpu))
return EXIT_FASTPATH_NONE;
- if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
- to_svm(vcpu)->vmcb->control.exit_info_1)
+ switch (svm->vmcb->control.exit_code) {
+ case SVM_EXIT_MSR:
+ if (!svm->vmcb->control.exit_info_1)
+ break;
return handle_fastpath_set_msr_irqoff(vcpu);
+ case SVM_EXIT_HLT:
+ return handle_fastpath_hlt(vcpu);
+ default:
+ break;
+ }
return EXIT_FASTPATH_NONE;
}
@@ -4992,8 +5004,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.check_processor_compatibility = svm_check_processor_compat,
.hardware_unsetup = svm_hardware_unsetup,
- .hardware_enable = svm_hardware_enable,
- .hardware_disable = svm_hardware_disable,
+ .enable_virtualization_cpu = svm_enable_virtualization_cpu,
+ .disable_virtualization_cpu = svm_disable_virtualization_cpu,
+ .emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu,
.has_emulated_msr = svm_has_emulated_msr,
.vcpu_create = svm_vcpu_create,
@@ -5011,7 +5024,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_unblocking = avic_vcpu_unblocking,
.update_exception_bitmap = svm_update_exception_bitmap,
- .get_msr_feature = svm_get_msr_feature,
+ .get_feature_msr = svm_get_feature_msr,
.get_msr = svm_get_msr,
.set_msr = svm_set_msr,
.get_segment_base = svm_get_segment_base,
@@ -5062,6 +5075,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.enable_nmi_window = svm_enable_nmi_window,
.enable_irq_window = svm_enable_irq_window,
.update_cr8_intercept = svm_update_cr8_intercept,
+
+ .x2apic_icr_is_split = true,
.set_virtual_apic_mode = avic_refresh_virtual_apic_mode,
.refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
.apicv_post_state_restore = avic_apicv_post_state_restore,
@@ -5266,7 +5281,7 @@ static __init int svm_hardware_setup(void)
iopm_va = page_address(iopm_pages);
memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
- iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
+ iopm_base = __sme_page_pa(iopm_pages);
init_msrpm_offsets();
@@ -5425,8 +5440,6 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = {
static void __svm_exit(void)
{
kvm_x86_vendor_exit();
-
- cpu_emergency_unregister_virt_callback(svm_emergency_disable);
}
static int __init svm_init(void)
@@ -5442,8 +5455,6 @@ static int __init svm_init(void)
if (r)
return r;
- cpu_emergency_register_virt_callback(svm_emergency_disable);
-
/*
* Common KVM initialization _must_ come last, after this, /dev/kvm is
* exposed to userspace!
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 76107c7d0595..43fa6a16eb19 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -25,7 +25,21 @@
#include "cpuid.h"
#include "kvm_cache_regs.h"
-#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+/*
+ * Helpers to convert to/from physical addresses for pages whose address is
+ * consumed directly by hardware. Even though it's a physical address, SVM
+ * often restricts the address to the natural width, hence 'unsigned long'
+ * instead of 'hpa_t'.
+ */
+static inline unsigned long __sme_page_pa(struct page *page)
+{
+ return __sme_set(page_to_pfn(page) << PAGE_SHIFT);
+}
+
+static inline struct page *__sme_pa_to_page(unsigned long pa)
+{
+ return pfn_to_page(__sme_clr(pa) >> PAGE_SHIFT);
+}
#define IOPM_SIZE PAGE_SIZE * 3
#define MSRPM_SIZE PAGE_SIZE * 2
@@ -321,7 +335,7 @@ struct svm_cpu_data {
u32 next_asid;
u32 min_asid;
- struct page *save_area;
+ struct vmcb *save_area;
unsigned long save_area_pa;
struct vmcb *current_vmcb;
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index a0c8eb37d3e1..2ed80aea3bb1 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -209,10 +209,8 @@ SYM_FUNC_START(__svm_vcpu_run)
7: vmload %_ASM_AX
8:
-#ifdef CONFIG_MITIGATION_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
-#endif
+ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
/* Clobbers RAX, RCX, RDX. */
RESTORE_HOST_SPEC_CTRL
@@ -348,10 +346,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
2: cli
-#ifdef CONFIG_MITIGATION_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
- FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
-#endif
+ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
/* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */
RESTORE_HOST_SPEC_CTRL
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 41a4533f9989..cb6588238f46 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -54,9 +54,7 @@ struct nested_vmx_msrs {
};
struct vmcs_config {
- int size;
- u32 basic_cap;
- u32 revision_id;
+ u64 basic;
u32 pin_based_exec_ctrl;
u32 cpu_based_exec_ctrl;
u32 cpu_based_2nd_exec_ctrl;
@@ -76,7 +74,7 @@ extern struct vmx_capability vmx_capability __ro_after_init;
static inline bool cpu_has_vmx_basic_inout(void)
{
- return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
+ return vmcs_config.basic & VMX_BASIC_INOUT;
}
static inline bool cpu_has_virtual_nmis(void)
@@ -225,7 +223,7 @@ static inline bool cpu_has_vmx_vmfunc(void)
static inline bool cpu_has_vmx_shadow_vmcs(void)
{
/* check if the cpu supports writing r/o exit information fields */
- if (!(vmcs_config.misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
+ if (!(vmcs_config.misc & VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
return false;
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -367,7 +365,7 @@ static inline bool cpu_has_vmx_invvpid_global(void)
static inline bool cpu_has_vmx_intel_pt(void)
{
- return (vmcs_config.misc & MSR_IA32_VMX_MISC_INTEL_PT) &&
+ return (vmcs_config.misc & VMX_MISC_INTEL_PT) &&
(vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) &&
(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL);
}
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 0bf35ebe8a1b..7668e2fb8043 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -23,8 +23,10 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.hardware_unsetup = vmx_hardware_unsetup,
- .hardware_enable = vmx_hardware_enable,
- .hardware_disable = vmx_hardware_disable,
+ .enable_virtualization_cpu = vmx_enable_virtualization_cpu,
+ .disable_virtualization_cpu = vmx_disable_virtualization_cpu,
+ .emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu,
+
.has_emulated_msr = vmx_has_emulated_msr,
.vm_size = sizeof(struct kvm_vmx),
@@ -41,7 +43,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.vcpu_put = vmx_vcpu_put,
.update_exception_bitmap = vmx_update_exception_bitmap,
- .get_msr_feature = vmx_get_msr_feature,
+ .get_feature_msr = vmx_get_feature_msr,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base,
@@ -89,6 +91,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.enable_nmi_window = vmx_enable_nmi_window,
.enable_irq_window = vmx_enable_irq_window,
.update_cr8_intercept = vmx_update_cr8_intercept,
+
+ .x2apic_icr_is_split = false,
.set_virtual_apic_mode = vmx_set_virtual_apic_mode,
.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2392a7ef254d..a8e7bc04d9bf 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -981,7 +981,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
__func__, i, e.index, e.reserved);
goto fail;
}
- if (kvm_set_msr(vcpu, e.index, e.value)) {
+ if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) {
pr_debug_ratelimited(
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
__func__, i, e.index, e.value);
@@ -1017,7 +1017,7 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
}
}
- if (kvm_get_msr(vcpu, msr_index, data)) {
+ if (kvm_get_msr_with_filter(vcpu, msr_index, data)) {
pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
msr_index);
return false;
@@ -1112,9 +1112,9 @@ static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
/*
* Emulated VMEntry does not fail here. Instead a less
* accurate value will be returned by
- * nested_vmx_get_vmexit_msr_value() using kvm_get_msr()
- * instead of reading the value from the vmcs02 VMExit
- * MSR-store area.
+ * nested_vmx_get_vmexit_msr_value() by reading KVM's
+ * internal MSR state instead of reading the value from
+ * the vmcs02 VMExit MSR-store area.
*/
pr_warn_ratelimited(
"Not enough msr entries in msr_autostore. Can't add msr %x\n",
@@ -1251,21 +1251,32 @@ static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
{
- const u64 feature_and_reserved =
- /* feature (except bit 48; see below) */
- BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
- /* reserved */
- BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
+ const u64 feature_bits = VMX_BASIC_DUAL_MONITOR_TREATMENT |
+ VMX_BASIC_INOUT |
+ VMX_BASIC_TRUE_CTLS;
+
+ const u64 reserved_bits = GENMASK_ULL(63, 56) |
+ GENMASK_ULL(47, 45) |
+ BIT_ULL(31);
+
u64 vmx_basic = vmcs_config.nested.basic;
- if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
+ BUILD_BUG_ON(feature_bits & reserved_bits);
+
+ /*
+ * Except for 32BIT_PHYS_ADDR_ONLY, which is an anti-feature bit (has
+ * inverted polarity), the incoming value must not set feature bits or
+ * reserved bits that aren't allowed/supported by KVM. Fields, i.e.
+ * multi-bit values, are explicitly checked below.
+ */
+ if (!is_bitwise_subset(vmx_basic, data, feature_bits | reserved_bits))
return -EINVAL;
/*
* KVM does not emulate a version of VMX that constrains physical
* addresses of VMX structures (e.g. VMCS) to 32-bits.
*/
- if (data & BIT_ULL(48))
+ if (data & VMX_BASIC_32BIT_PHYS_ADDR_ONLY)
return -EINVAL;
if (vmx_basic_vmcs_revision_id(vmx_basic) !=
@@ -1334,16 +1345,29 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
{
- const u64 feature_and_reserved_bits =
- /* feature */
- BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
- BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
- /* reserved */
- GENMASK_ULL(13, 9) | BIT_ULL(31);
+ const u64 feature_bits = VMX_MISC_SAVE_EFER_LMA |
+ VMX_MISC_ACTIVITY_HLT |
+ VMX_MISC_ACTIVITY_SHUTDOWN |
+ VMX_MISC_ACTIVITY_WAIT_SIPI |
+ VMX_MISC_INTEL_PT |
+ VMX_MISC_RDMSR_IN_SMM |
+ VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
+ VMX_MISC_VMXOFF_BLOCK_SMI |
+ VMX_MISC_ZERO_LEN_INS;
+
+ const u64 reserved_bits = BIT_ULL(31) | GENMASK_ULL(13, 9);
+
u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low,
vmcs_config.nested.misc_high);
- if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
+ BUILD_BUG_ON(feature_bits & reserved_bits);
+
+ /*
+ * The incoming value must not set feature bits or reserved bits that
+ * aren't allowed/supported by KVM. Fields, i.e. multi-bit values, are
+ * explicitly checked below.
+ */
+ if (!is_bitwise_subset(vmx_misc, data, feature_bits | reserved_bits))
return -EINVAL;
if ((vmx->nested.msrs.pinbased_ctls_high &
@@ -2317,10 +2341,12 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
/* Posted interrupts setting is only taken from vmcs12. */
vmx->nested.pi_pending = false;
- if (nested_cpu_has_posted_intr(vmcs12))
+ if (nested_cpu_has_posted_intr(vmcs12)) {
vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
- else
+ } else {
+ vmx->nested.posted_intr_nv = -1;
exec_control &= ~PIN_BASED_POSTED_INTR;
+ }
pin_controls_set(vmx, exec_control);
/*
@@ -2470,6 +2496,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
+
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
@@ -2507,7 +2534,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
- vmx->segment_cache.bitmask = 0;
+ vmx_segment_cache_clear(vmx);
}
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
@@ -4284,11 +4311,52 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
}
if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
+ int irq;
+
if (block_nested_events)
return -EBUSY;
if (!nested_exit_on_intr(vcpu))
goto no_vmexit;
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+
+ if (!nested_exit_intr_ack_set(vcpu)) {
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+ return 0;
+ }
+
+ irq = kvm_cpu_get_extint(vcpu);
+ if (irq != -1) {
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
+ INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0);
+ return 0;
+ }
+
+ irq = kvm_apic_has_interrupt(vcpu);
+ if (WARN_ON_ONCE(irq < 0))
+ goto no_vmexit;
+
+ /*
+ * If the IRQ is L2's PI notification vector, process posted
+ * interrupts for L2 instead of injecting VM-Exit, as the
+ * detection/morphing architecturally occurs when the IRQ is
+ * delivered to the CPU. Note, only interrupts that are routed
+ * through the local APIC trigger posted interrupt processing,
+ * and enabling posted interrupts requires ACK-on-exit.
+ */
+ if (irq == vmx->nested.posted_intr_nv) {
+ vmx->nested.pi_pending = true;
+ kvm_apic_clear_irr(vcpu, irq);
+ goto no_vmexit;
+ }
+
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
+ INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0);
+
+ /*
+ * ACK the interrupt _after_ emulating VM-Exit, as the IRQ must
+ * be marked as in-service in vmcs01.GUEST_INTERRUPT_STATUS.SVI
+ * if APICv is active.
+ */
+ kvm_apic_ack_interrupt(vcpu, irq);
return 0;
}
@@ -4806,7 +4874,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
goto vmabort;
}
- if (kvm_set_msr(vcpu, h.index, h.value)) {
+ if (kvm_set_msr_with_filter(vcpu, h.index, h.value)) {
pr_debug_ratelimited(
"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
__func__, j, h.index, h.value);
@@ -4969,14 +5037,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
if (likely(!vmx->fail)) {
- if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
- nested_exit_intr_ack_set(vcpu)) {
- int irq = kvm_cpu_get_interrupt(vcpu);
- WARN_ON(irq < 0);
- vmcs12->vm_exit_intr_info = irq |
- INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
- }
-
if (vm_exit_reason != -1)
trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
vmcs12->exit_qualification,
@@ -7051,7 +7111,7 @@ static void nested_vmx_setup_misc_data(struct vmcs_config *vmcs_conf,
{
msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA;
msrs->misc_low |=
- MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
+ VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
VMX_MISC_ACTIVITY_HLT |
VMX_MISC_ACTIVITY_WAIT_SIPI;
@@ -7066,12 +7126,10 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs)
* guest, and the VMCS structure we give it - not about the
* VMX support of the underlying hardware.
*/
- msrs->basic =
- VMCS12_REVISION |
- VMX_BASIC_TRUE_CTLS |
- ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
- (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
+ msrs->basic = vmx_basic_encode_vmcs_info(VMCS12_REVISION, VMCS12_SIZE,
+ X86_MEMTYPE_WB);
+ msrs->basic |= VMX_BASIC_TRUE_CTLS;
if (cpu_has_vmx_basic_inout())
msrs->basic |= VMX_BASIC_INOUT;
}
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index cce4e2aa30fb..2c296b6abb8c 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -39,11 +39,17 @@ bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
+ lockdep_assert_once(lockdep_is_held(&vcpu->mutex) ||
+ !refcount_read(&vcpu->kvm->users_count));
+
return to_vmx(vcpu)->nested.cached_vmcs12;
}
static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
{
+ lockdep_assert_once(lockdep_is_held(&vcpu->mutex) ||
+ !refcount_read(&vcpu->kvm->users_count));
+
return to_vmx(vcpu)->nested.cached_shadow_vmcs12;
}
@@ -109,7 +115,7 @@ static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
{
return to_vmx(vcpu)->nested.msrs.misc_low &
- MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
+ VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
}
static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index 6fef01e0536e..a3c3d2a51f47 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -274,7 +274,7 @@ static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
* simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
* enforce restriction of access to the PROVISIONKEY.
*/
- contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
+ contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL);
if (!contents)
return -ENOMEM;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 733a0c45d1a6..1a4438358c5e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -525,10 +525,6 @@ static const struct kvm_vmx_segment_field {
VMX_SEGMENT_FIELD(LDTR),
};
-static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
-{
- vmx->segment_cache.bitmask = 0;
-}
static unsigned long host_idt_base;
@@ -755,7 +751,7 @@ fault:
return -EIO;
}
-static void vmx_emergency_disable(void)
+void vmx_emergency_disable_virtualization_cpu(void)
{
int cpu = raw_smp_processor_id();
struct loaded_vmcs *v;
@@ -1998,15 +1994,15 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
return !(msr->data & ~valid_bits);
}
-int vmx_get_msr_feature(struct kvm_msr_entry *msr)
+int vmx_get_feature_msr(u32 msr, u64 *data)
{
- switch (msr->index) {
+ switch (msr) {
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested)
return 1;
- return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
+ return vmx_get_vmx_msr(&vmcs_config.nested, msr, data);
default:
- return KVM_MSR_RET_INVALID;
+ return KVM_MSR_RET_UNSUPPORTED;
}
}
@@ -2605,13 +2601,13 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
struct vmx_capability *vmx_cap)
{
- u32 vmx_msr_low, vmx_msr_high;
u32 _pin_based_exec_control = 0;
u32 _cpu_based_exec_control = 0;
u32 _cpu_based_2nd_exec_control = 0;
u64 _cpu_based_3rd_exec_control = 0;
u32 _vmexit_control = 0;
u32 _vmentry_control = 0;
+ u64 basic_msr;
u64 misc_msr;
int i;
@@ -2734,29 +2730,29 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
_vmexit_control &= ~x_ctrl;
}
- rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
+ rdmsrl(MSR_IA32_VMX_BASIC, basic_msr);
/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
- if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
+ if (vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE)
return -EIO;
#ifdef CONFIG_X86_64
- /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
- if (vmx_msr_high & (1u<<16))
+ /*
+ * KVM expects to be able to shove all legal physical addresses into
+ * VMCS fields for 64-bit kernels, and per the SDM, "This bit is always
+ * 0 for processors that support Intel 64 architecture".
+ */
+ if (basic_msr & VMX_BASIC_32BIT_PHYS_ADDR_ONLY)
return -EIO;
#endif
/* Require Write-Back (WB) memory type for VMCS accesses. */
- if (((vmx_msr_high >> 18) & 15) != 6)
+ if (vmx_basic_vmcs_mem_type(basic_msr) != X86_MEMTYPE_WB)
return -EIO;
rdmsrl(MSR_IA32_VMX_MISC, misc_msr);
- vmcs_conf->size = vmx_msr_high & 0x1fff;
- vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
-
- vmcs_conf->revision_id = vmx_msr_low;
-
+ vmcs_conf->basic = basic_msr;
vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
@@ -2844,7 +2840,7 @@ fault:
return -EFAULT;
}
-int vmx_hardware_enable(void)
+int vmx_enable_virtualization_cpu(void)
{
int cpu = raw_smp_processor_id();
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2881,7 +2877,7 @@ static void vmclear_local_loaded_vmcss(void)
__loaded_vmcs_clear(v);
}
-void vmx_hardware_disable(void)
+void vmx_disable_virtualization_cpu(void)
{
vmclear_local_loaded_vmcss();
@@ -2903,13 +2899,13 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
if (!pages)
return NULL;
vmcs = page_address(pages);
- memset(vmcs, 0, vmcs_config.size);
+ memset(vmcs, 0, vmx_basic_vmcs_size(vmcs_config.basic));
/* KVM supports Enlightened VMCS v1 only */
if (kvm_is_using_evmcs())
vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
else
- vmcs->hdr.revision_id = vmcs_config.revision_id;
+ vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic);
if (shadow)
vmcs->hdr.shadow_vmcs = 1;
@@ -3002,7 +2998,7 @@ static __init int alloc_kvm_area(void)
* physical CPU.
*/
if (kvm_is_using_evmcs())
- vmcs->hdr.revision_id = vmcs_config.revision_id;
+ vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic);
per_cpu(vmxarea, cpu) = vmcs;
}
@@ -4219,6 +4215,13 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ /*
+ * DO NOT query the vCPU's vmcs12, as vmcs12 is dynamically allocated
+ * and freed, and must not be accessed outside of vcpu->mutex. The
+ * vCPU's cached PI NV is valid if and only if posted interrupts
+ * enabled in its vmcs12, i.e. checking the vector also checks that
+ * L1 has enabled posted interrupts for L2.
+ */
if (is_guest_mode(vcpu) &&
vector == vmx->nested.posted_intr_nv) {
/*
@@ -5804,8 +5807,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK)
? PFERR_PRESENT_MASK : 0;
- error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
- PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
+ if (error_code & EPT_VIOLATION_GVA_IS_VALID)
+ error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ?
+ PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
/*
* Check that the GPA doesn't exceed physical memory limits, as that is
@@ -7265,6 +7269,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu,
return handle_fastpath_set_msr_irqoff(vcpu);
case EXIT_REASON_PREEMPTION_TIMER:
return handle_fastpath_preemption_timer(vcpu, force_immediate_exit);
+ case EXIT_REASON_HLT:
+ return handle_fastpath_hlt(vcpu);
default:
return EXIT_FASTPATH_NONE;
}
@@ -7965,6 +7971,7 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_clear(X86_FEATURE_SGX_LC);
kvm_cpu_cap_clear(X86_FEATURE_SGX1);
kvm_cpu_cap_clear(X86_FEATURE_SGX2);
+ kvm_cpu_cap_clear(X86_FEATURE_SGX_EDECCSSA);
}
if (vmx_umip_emulated())
@@ -8515,7 +8522,7 @@ __init int vmx_hardware_setup(void)
u64 use_timer_freq = 5000ULL * 1000 * 1000;
cpu_preemption_timer_multi =
- vmcs_config.misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+ vmx_misc_preemption_timer_rate(vmcs_config.misc);
if (tsc_khz)
use_timer_freq = (u64)tsc_khz * 1000;
@@ -8582,8 +8589,6 @@ static void __vmx_exit(void)
{
allow_smaller_maxphyaddr = false;
- cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
-
vmx_cleanup_l1d_flush();
}
@@ -8630,8 +8635,6 @@ static int __init vmx_init(void)
pi_init_cpu(cpu);
}
- cpu_emergency_register_virt_callback(vmx_emergency_disable);
-
vmx_check_vmcs12_offsets();
/*
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 42498fa63abb..2325f773a20b 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -17,10 +17,6 @@
#include "run_flags.h"
#include "../mmu.h"
-#define MSR_TYPE_R 1
-#define MSR_TYPE_W 2
-#define MSR_TYPE_RW 3
-
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
#ifdef CONFIG_X86_64
@@ -756,4 +752,9 @@ static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu)
return lapic_in_kernel(vcpu) && enable_ipiv;
}
+static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
+{
+ vmx->segment_cache.bitmask = 0;
+}
+
#endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/vmx/vmx_onhyperv.h b/arch/x86/kvm/vmx/vmx_onhyperv.h
index eb48153bfd73..bba24ed99ee6 100644
--- a/arch/x86/kvm/vmx/vmx_onhyperv.h
+++ b/arch/x86/kvm/vmx/vmx_onhyperv.h
@@ -104,6 +104,14 @@ static inline void evmcs_load(u64 phys_addr)
struct hv_vp_assist_page *vp_ap =
hv_get_vp_assist_page(smp_processor_id());
+ /*
+ * When enabling eVMCS, KVM verifies that every CPU has a valid hv_vp_assist_page()
+ * and aborts enabling the feature otherwise. CPU onlining path is also checked in
+ * vmx_hardware_enable().
+ */
+ if (KVM_BUG_ON(!vp_ap, kvm_get_running_vcpu()->kvm))
+ return;
+
if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall)
vp_ap->nested_control.features.directhypercall = 1;
vp_ap->current_nested_vmcs = phys_addr;
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 8060e5fc6dbd..93e020dc88f6 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -47,7 +47,7 @@ static __always_inline void vmcs_check16(unsigned long field)
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
"16-bit accessor invalid for 64-bit high field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
- "16-bit accessor invalid for 32-bit high field");
+ "16-bit accessor invalid for 32-bit field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
"16-bit accessor invalid for natural width field");
}
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index ce3221cd1d01..a55981c5216e 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -13,8 +13,9 @@ extern struct kvm_x86_init_ops vt_init_ops __initdata;
void vmx_hardware_unsetup(void);
int vmx_check_processor_compat(void);
-int vmx_hardware_enable(void);
-void vmx_hardware_disable(void);
+int vmx_enable_virtualization_cpu(void);
+void vmx_disable_virtualization_cpu(void);
+void vmx_emergency_disable_virtualization_cpu(void);
int vmx_vm_init(struct kvm *kvm);
void vmx_vm_destroy(struct kvm *kvm);
int vmx_vcpu_precreate(struct kvm *kvm);
@@ -56,7 +57,7 @@ bool vmx_has_emulated_msr(struct kvm *kvm, u32 index);
void vmx_msr_filter_changed(struct kvm_vcpu *vcpu);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
-int vmx_get_msr_feature(struct kvm_msr_entry *msr);
+int vmx_get_feature_msr(u32 msr, u64 *data);
int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg);
void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c983c8e434b8..83fe0a78146f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -305,24 +305,237 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
static struct kmem_cache *x86_emulator_cache;
/*
- * When called, it means the previous get/set msr reached an invalid msr.
- * Return true if we want to ignore/silent this failed msr access.
+ * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track
+ * the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS,
+ * KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that
+ * require host support, i.e. should be probed via RDMSR. emulated_msrs holds
+ * MSRs that KVM emulates without strictly requiring host support.
+ * msr_based_features holds MSRs that enumerate features, i.e. are effectively
+ * CPUID leafs. Note, msr_based_features isn't mutually exclusive with
+ * msrs_to_save and emulated_msrs.
*/
-static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write)
+
+static const u32 msrs_to_save_base[] = {
+ MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
+ MSR_STAR,
+#ifdef CONFIG_X86_64
+ MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
+#endif
+ MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+ MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
+ MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL,
+ MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
+ MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
+ MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
+ MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
+ MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
+ MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
+ MSR_IA32_UMWAIT_CONTROL,
+
+ MSR_IA32_XFD, MSR_IA32_XFD_ERR,
+};
+
+static const u32 msrs_to_save_pmu[] = {
+ MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
+ MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
+ MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
+ MSR_CORE_PERF_GLOBAL_CTRL,
+ MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
+
+ /* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */
+ MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
+ MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
+ MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
+ MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
+ MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
+ MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
+ MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
+ MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
+
+ MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
+ MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
+
+ /* This part of MSRs should match KVM_MAX_NR_AMD_GP_COUNTERS. */
+ MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
+ MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
+ MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
+ MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
+
+ MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
+ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
+ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
+};
+
+static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) +
+ ARRAY_SIZE(msrs_to_save_pmu)];
+static unsigned num_msrs_to_save;
+
+static const u32 emulated_msrs_all[] = {
+ MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+ MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
+
+#ifdef CONFIG_KVM_HYPERV
+ HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+ HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
+ HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
+ HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
+ HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
+ HV_X64_MSR_RESET,
+ HV_X64_MSR_VP_INDEX,
+ HV_X64_MSR_VP_RUNTIME,
+ HV_X64_MSR_SCONTROL,
+ HV_X64_MSR_STIMER0_CONFIG,
+ HV_X64_MSR_VP_ASSIST_PAGE,
+ HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
+ HV_X64_MSR_TSC_EMULATION_STATUS, HV_X64_MSR_TSC_INVARIANT_CONTROL,
+ HV_X64_MSR_SYNDBG_OPTIONS,
+ HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
+ HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
+ HV_X64_MSR_SYNDBG_PENDING_BUFFER,
+#endif
+
+ MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
+ MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
+
+ MSR_IA32_TSC_ADJUST,
+ MSR_IA32_TSC_DEADLINE,
+ MSR_IA32_ARCH_CAPABILITIES,
+ MSR_IA32_PERF_CAPABILITIES,
+ MSR_IA32_MISC_ENABLE,
+ MSR_IA32_MCG_STATUS,
+ MSR_IA32_MCG_CTL,
+ MSR_IA32_MCG_EXT_CTL,
+ MSR_IA32_SMBASE,
+ MSR_SMI_COUNT,
+ MSR_PLATFORM_INFO,
+ MSR_MISC_FEATURES_ENABLES,
+ MSR_AMD64_VIRT_SPEC_CTRL,
+ MSR_AMD64_TSC_RATIO,
+ MSR_IA32_POWER_CTL,
+ MSR_IA32_UCODE_REV,
+
+ /*
+ * KVM always supports the "true" VMX control MSRs, even if the host
+ * does not. The VMX MSRs as a whole are considered "emulated" as KVM
+ * doesn't strictly require them to exist in the host (ignoring that
+ * KVM would refuse to load in the first place if the core set of MSRs
+ * aren't supported).
+ */
+ MSR_IA32_VMX_BASIC,
+ MSR_IA32_VMX_TRUE_PINBASED_CTLS,
+ MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
+ MSR_IA32_VMX_TRUE_EXIT_CTLS,
+ MSR_IA32_VMX_TRUE_ENTRY_CTLS,
+ MSR_IA32_VMX_MISC,
+ MSR_IA32_VMX_CR0_FIXED0,
+ MSR_IA32_VMX_CR4_FIXED0,
+ MSR_IA32_VMX_VMCS_ENUM,
+ MSR_IA32_VMX_PROCBASED_CTLS2,
+ MSR_IA32_VMX_EPT_VPID_CAP,
+ MSR_IA32_VMX_VMFUNC,
+
+ MSR_K7_HWCR,
+ MSR_KVM_POLL_CONTROL,
+};
+
+static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
+static unsigned num_emulated_msrs;
+
+/*
+ * List of MSRs that control the existence of MSR-based features, i.e. MSRs
+ * that are effectively CPUID leafs. VMX MSRs are also included in the set of
+ * feature MSRs, but are handled separately to allow expedited lookups.
+ */
+static const u32 msr_based_features_all_except_vmx[] = {
+ MSR_AMD64_DE_CFG,
+ MSR_IA32_UCODE_REV,
+ MSR_IA32_ARCH_CAPABILITIES,
+ MSR_IA32_PERF_CAPABILITIES,
+};
+
+static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) +
+ (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)];
+static unsigned int num_msr_based_features;
+
+/*
+ * All feature MSRs except uCode revID, which tracks the currently loaded uCode
+ * patch, are immutable once the vCPU model is defined.
+ */
+static bool kvm_is_immutable_feature_msr(u32 msr)
{
- const char *op = write ? "wrmsr" : "rdmsr";
+ int i;
- if (ignore_msrs) {
- if (report_ignored_msrs)
- kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
- op, msr, data);
- /* Mask the error */
+ if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR)
return true;
- } else {
+
+ for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) {
+ if (msr == msr_based_features_all_except_vmx[i])
+ return msr != MSR_IA32_UCODE_REV;
+ }
+
+ return false;
+}
+
+static bool kvm_is_advertised_msr(u32 msr_index)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_msrs_to_save; i++) {
+ if (msrs_to_save[i] == msr_index)
+ return true;
+ }
+
+ for (i = 0; i < num_emulated_msrs; i++) {
+ if (emulated_msrs[i] == msr_index)
+ return true;
+ }
+
+ return false;
+}
+
+typedef int (*msr_access_t)(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+ bool host_initiated);
+
+static __always_inline int kvm_do_msr_access(struct kvm_vcpu *vcpu, u32 msr,
+ u64 *data, bool host_initiated,
+ enum kvm_msr_access rw,
+ msr_access_t msr_access_fn)
+{
+ const char *op = rw == MSR_TYPE_W ? "wrmsr" : "rdmsr";
+ int ret;
+
+ BUILD_BUG_ON(rw != MSR_TYPE_R && rw != MSR_TYPE_W);
+
+ /*
+ * Zero the data on read failures to avoid leaking stack data to the
+ * guest and/or userspace, e.g. if the failure is ignored below.
+ */
+ ret = msr_access_fn(vcpu, msr, data, host_initiated);
+ if (ret && rw == MSR_TYPE_R)
+ *data = 0;
+
+ if (ret != KVM_MSR_RET_UNSUPPORTED)
+ return ret;
+
+ /*
+ * Userspace is allowed to read MSRs, and write '0' to MSRs, that KVM
+ * advertises to userspace, even if an MSR isn't fully supported.
+ * Simply check that @data is '0', which covers both the write '0' case
+ * and all reads (in which case @data is zeroed on failure; see above).
+ */
+ if (host_initiated && !*data && kvm_is_advertised_msr(msr))
+ return 0;
+
+ if (!ignore_msrs) {
kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
- op, msr, data);
- return false;
+ op, msr, *data);
+ return ret;
}
+
+ if (report_ignored_msrs)
+ kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", op, msr, *data);
+
+ return 0;
}
static struct kmem_cache *kvm_alloc_emulator_cache(void)
@@ -355,7 +568,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
/*
* Disabling irqs at this point since the following code could be
- * interrupted and executed through kvm_arch_hardware_disable()
+ * interrupted and executed through kvm_arch_disable_virtualization_cpu()
*/
local_irq_save(flags);
if (msrs->registered) {
@@ -413,8 +626,7 @@ EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
static void kvm_user_return_msr_cpu_online(void)
{
- unsigned int cpu = smp_processor_id();
- struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
+ struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs);
u64 value;
int i;
@@ -621,12 +833,6 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto
ex->payload = payload;
}
-/* Forcibly leave the nested mode in cases like a vCPU reset */
-static void kvm_leave_nested(struct kvm_vcpu *vcpu)
-{
- kvm_x86_ops.nested_ops->leave_nested(vcpu);
-}
-
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code,
bool has_payload, unsigned long payload, bool reinject)
@@ -1412,178 +1618,6 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
/*
- * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track
- * the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS,
- * KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that
- * require host support, i.e. should be probed via RDMSR. emulated_msrs holds
- * MSRs that KVM emulates without strictly requiring host support.
- * msr_based_features holds MSRs that enumerate features, i.e. are effectively
- * CPUID leafs. Note, msr_based_features isn't mutually exclusive with
- * msrs_to_save and emulated_msrs.
- */
-
-static const u32 msrs_to_save_base[] = {
- MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
- MSR_STAR,
-#ifdef CONFIG_X86_64
- MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
-#endif
- MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
- MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
- MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL,
- MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
- MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
- MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
- MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
- MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
- MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
- MSR_IA32_UMWAIT_CONTROL,
-
- MSR_IA32_XFD, MSR_IA32_XFD_ERR,
-};
-
-static const u32 msrs_to_save_pmu[] = {
- MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
- MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
- MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
- MSR_CORE_PERF_GLOBAL_CTRL,
- MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
-
- /* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */
- MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
- MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
- MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
- MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
- MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
- MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
- MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
- MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
-
- MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
- MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
-
- /* This part of MSRs should match KVM_MAX_NR_AMD_GP_COUNTERS. */
- MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
- MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
- MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
- MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
-
- MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
- MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
- MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
-};
-
-static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) +
- ARRAY_SIZE(msrs_to_save_pmu)];
-static unsigned num_msrs_to_save;
-
-static const u32 emulated_msrs_all[] = {
- MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
-
-#ifdef CONFIG_KVM_HYPERV
- HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
- HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
- HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
- HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
- HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
- HV_X64_MSR_RESET,
- HV_X64_MSR_VP_INDEX,
- HV_X64_MSR_VP_RUNTIME,
- HV_X64_MSR_SCONTROL,
- HV_X64_MSR_STIMER0_CONFIG,
- HV_X64_MSR_VP_ASSIST_PAGE,
- HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
- HV_X64_MSR_TSC_EMULATION_STATUS, HV_X64_MSR_TSC_INVARIANT_CONTROL,
- HV_X64_MSR_SYNDBG_OPTIONS,
- HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
- HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
- HV_X64_MSR_SYNDBG_PENDING_BUFFER,
-#endif
-
- MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
- MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
-
- MSR_IA32_TSC_ADJUST,
- MSR_IA32_TSC_DEADLINE,
- MSR_IA32_ARCH_CAPABILITIES,
- MSR_IA32_PERF_CAPABILITIES,
- MSR_IA32_MISC_ENABLE,
- MSR_IA32_MCG_STATUS,
- MSR_IA32_MCG_CTL,
- MSR_IA32_MCG_EXT_CTL,
- MSR_IA32_SMBASE,
- MSR_SMI_COUNT,
- MSR_PLATFORM_INFO,
- MSR_MISC_FEATURES_ENABLES,
- MSR_AMD64_VIRT_SPEC_CTRL,
- MSR_AMD64_TSC_RATIO,
- MSR_IA32_POWER_CTL,
- MSR_IA32_UCODE_REV,
-
- /*
- * KVM always supports the "true" VMX control MSRs, even if the host
- * does not. The VMX MSRs as a whole are considered "emulated" as KVM
- * doesn't strictly require them to exist in the host (ignoring that
- * KVM would refuse to load in the first place if the core set of MSRs
- * aren't supported).
- */
- MSR_IA32_VMX_BASIC,
- MSR_IA32_VMX_TRUE_PINBASED_CTLS,
- MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
- MSR_IA32_VMX_TRUE_EXIT_CTLS,
- MSR_IA32_VMX_TRUE_ENTRY_CTLS,
- MSR_IA32_VMX_MISC,
- MSR_IA32_VMX_CR0_FIXED0,
- MSR_IA32_VMX_CR4_FIXED0,
- MSR_IA32_VMX_VMCS_ENUM,
- MSR_IA32_VMX_PROCBASED_CTLS2,
- MSR_IA32_VMX_EPT_VPID_CAP,
- MSR_IA32_VMX_VMFUNC,
-
- MSR_K7_HWCR,
- MSR_KVM_POLL_CONTROL,
-};
-
-static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
-static unsigned num_emulated_msrs;
-
-/*
- * List of MSRs that control the existence of MSR-based features, i.e. MSRs
- * that are effectively CPUID leafs. VMX MSRs are also included in the set of
- * feature MSRs, but are handled separately to allow expedited lookups.
- */
-static const u32 msr_based_features_all_except_vmx[] = {
- MSR_AMD64_DE_CFG,
- MSR_IA32_UCODE_REV,
- MSR_IA32_ARCH_CAPABILITIES,
- MSR_IA32_PERF_CAPABILITIES,
-};
-
-static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) +
- (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)];
-static unsigned int num_msr_based_features;
-
-/*
- * All feature MSRs except uCode revID, which tracks the currently loaded uCode
- * patch, are immutable once the vCPU model is defined.
- */
-static bool kvm_is_immutable_feature_msr(u32 msr)
-{
- int i;
-
- if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR)
- return true;
-
- for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) {
- if (msr == msr_based_features_all_except_vmx[i])
- return msr != MSR_IA32_UCODE_REV;
- }
-
- return false;
-}
-
-/*
* Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
* does not yet virtualize. These include:
* 10 - MISC_PACKAGE_CTRLS
@@ -1660,40 +1694,31 @@ static u64 kvm_get_arch_capabilities(void)
return data;
}
-static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
+static int kvm_get_feature_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+ bool host_initiated)
{
- switch (msr->index) {
+ WARN_ON_ONCE(!host_initiated);
+
+ switch (index) {
case MSR_IA32_ARCH_CAPABILITIES:
- msr->data = kvm_get_arch_capabilities();
+ *data = kvm_get_arch_capabilities();
break;
case MSR_IA32_PERF_CAPABILITIES:
- msr->data = kvm_caps.supported_perf_cap;
+ *data = kvm_caps.supported_perf_cap;
break;
case MSR_IA32_UCODE_REV:
- rdmsrl_safe(msr->index, &msr->data);
+ rdmsrl_safe(index, data);
break;
default:
- return kvm_x86_call(get_msr_feature)(msr);
+ return kvm_x86_call(get_feature_msr)(index, data);
}
return 0;
}
-static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+static int do_get_feature_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{
- struct kvm_msr_entry msr;
- int r;
-
- /* Unconditionally clear the output for simplicity */
- msr.data = 0;
- msr.index = index;
- r = kvm_get_msr_feature(&msr);
-
- if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false))
- r = 0;
-
- *data = msr.data;
-
- return r;
+ return kvm_do_msr_access(vcpu, index, data, true, MSR_TYPE_R,
+ kvm_get_feature_msr);
}
static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -1880,16 +1905,17 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
return kvm_x86_call(set_msr)(vcpu, &msr);
}
+static int _kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+ bool host_initiated)
+{
+ return __kvm_set_msr(vcpu, index, *data, host_initiated);
+}
+
static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
u32 index, u64 data, bool host_initiated)
{
- int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
-
- if (ret == KVM_MSR_RET_INVALID)
- if (kvm_msr_ignored_check(index, data, true))
- ret = 0;
-
- return ret;
+ return kvm_do_msr_access(vcpu, index, &data, host_initiated, MSR_TYPE_W,
+ _kvm_set_msr);
}
/*
@@ -1928,31 +1954,25 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
u32 index, u64 *data, bool host_initiated)
{
- int ret = __kvm_get_msr(vcpu, index, data, host_initiated);
-
- if (ret == KVM_MSR_RET_INVALID) {
- /* Unconditionally clear *data for simplicity */
- *data = 0;
- if (kvm_msr_ignored_check(index, 0, false))
- ret = 0;
- }
-
- return ret;
+ return kvm_do_msr_access(vcpu, index, data, host_initiated, MSR_TYPE_R,
+ __kvm_get_msr);
}
-static int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data)
{
if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
return KVM_MSR_RET_FILTERED;
return kvm_get_msr_ignored_check(vcpu, index, data, false);
}
+EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter);
-static int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data)
{
if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
return KVM_MSR_RET_FILTERED;
return kvm_set_msr_ignored_check(vcpu, index, data, false);
}
+EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter);
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
{
@@ -1999,7 +2019,7 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu)
static u64 kvm_msr_reason(int r)
{
switch (r) {
- case KVM_MSR_RET_INVALID:
+ case KVM_MSR_RET_UNSUPPORTED:
return KVM_MSR_EXIT_REASON_UNKNOWN;
case KVM_MSR_RET_FILTERED:
return KVM_MSR_EXIT_REASON_FILTER;
@@ -2162,31 +2182,34 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
{
u32 msr = kvm_rcx_read(vcpu);
u64 data;
- fastpath_t ret = EXIT_FASTPATH_NONE;
+ fastpath_t ret;
+ bool handled;
kvm_vcpu_srcu_read_lock(vcpu);
switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4):
data = kvm_read_edx_eax(vcpu);
- if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
- kvm_skip_emulated_instruction(vcpu);
- ret = EXIT_FASTPATH_EXIT_HANDLED;
- }
+ handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data);
break;
case MSR_IA32_TSC_DEADLINE:
data = kvm_read_edx_eax(vcpu);
- if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
- kvm_skip_emulated_instruction(vcpu);
- ret = EXIT_FASTPATH_REENTER_GUEST;
- }
+ handled = !handle_fastpath_set_tscdeadline(vcpu, data);
break;
default:
+ handled = false;
break;
}
- if (ret != EXIT_FASTPATH_NONE)
+ if (handled) {
+ if (!kvm_skip_emulated_instruction(vcpu))
+ ret = EXIT_FASTPATH_EXIT_USERSPACE;
+ else
+ ret = EXIT_FASTPATH_REENTER_GUEST;
trace_kvm_msr_write(msr, data);
+ } else {
+ ret = EXIT_FASTPATH_NONE;
+ }
kvm_vcpu_srcu_read_unlock(vcpu);
@@ -3746,18 +3769,6 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
}
-static bool kvm_is_msr_to_save(u32 msr_index)
-{
- unsigned int i;
-
- for (i = 0; i < num_msrs_to_save; i++) {
- if (msrs_to_save[i] == msr_index)
- return true;
- }
-
- return false;
-}
-
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
u32 msr = msr_info->index;
@@ -4139,15 +4150,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
- /*
- * Userspace is allowed to write '0' to MSRs that KVM reports
- * as to-be-saved, even if an MSRs isn't fully supported.
- */
- if (msr_info->host_initiated && !data &&
- kvm_is_msr_to_save(msr))
- break;
-
- return KVM_MSR_RET_INVALID;
+ return KVM_MSR_RET_UNSUPPORTED;
}
return 0;
}
@@ -4498,17 +4501,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info);
- /*
- * Userspace is allowed to read MSRs that KVM reports as
- * to-be-saved, even if an MSR isn't fully supported.
- */
- if (msr_info->host_initiated &&
- kvm_is_msr_to_save(msr_info->index)) {
- msr_info->data = 0;
- break;
- }
-
- return KVM_MSR_RET_INVALID;
+ return KVM_MSR_RET_UNSUPPORTED;
}
return 0;
}
@@ -4946,7 +4939,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
break;
}
case KVM_GET_MSRS:
- r = msr_io(NULL, argp, do_get_msr_feature, 1);
+ r = msr_io(NULL, argp, do_get_feature_msr, 1);
break;
#ifdef CONFIG_KVM_HYPERV
case KVM_GET_SUPPORTED_HV_CPUID:
@@ -7383,11 +7376,9 @@ out:
static void kvm_probe_feature_msr(u32 msr_index)
{
- struct kvm_msr_entry msr = {
- .index = msr_index,
- };
+ u64 data;
- if (kvm_get_msr_feature(&msr))
+ if (kvm_get_feature_msr(NULL, msr_index, &data, true))
return;
msr_based_features[num_msr_based_features++] = msr_index;
@@ -8865,60 +8856,13 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
return 1;
}
-static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
- int emulation_type)
+static bool kvm_unprotect_and_retry_on_failure(struct kvm_vcpu *vcpu,
+ gpa_t cr2_or_gpa,
+ int emulation_type)
{
- gpa_t gpa = cr2_or_gpa;
- kvm_pfn_t pfn;
-
if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
return false;
- if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
- WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
- return false;
-
- if (!vcpu->arch.mmu->root_role.direct) {
- /*
- * Write permission should be allowed since only
- * write access need to be emulated.
- */
- gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
-
- /*
- * If the mapping is invalid in guest, let cpu retry
- * it to generate fault.
- */
- if (gpa == INVALID_GPA)
- return true;
- }
-
- /*
- * Do not retry the unhandleable instruction if it faults on the
- * readonly host memory, otherwise it will goto a infinite loop:
- * retry instruction -> write #PF -> emulation fail -> retry
- * instruction -> ...
- */
- pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
-
- /*
- * If the instruction failed on the error pfn, it can not be fixed,
- * report the error to userspace.
- */
- if (is_error_noslot_pfn(pfn))
- return false;
-
- kvm_release_pfn_clean(pfn);
-
- /*
- * If emulation may have been triggered by a write to a shadowed page
- * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the
- * guest to let the CPU re-execute the instruction in the hope that the
- * CPU can cleanly execute the instruction that KVM failed to emulate.
- */
- if (vcpu->kvm->arch.indirect_shadow_pages)
- kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
-
/*
* If the failed instruction faulted on an access to page tables that
* are used to translate any part of the instruction, KVM can't resolve
@@ -8929,54 +8873,24 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* then zap the SPTE to unprotect the gfn, and then do it all over
* again. Report the error to userspace.
*/
- return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP);
-}
-
-static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
- gpa_t cr2_or_gpa, int emulation_type)
-{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;
-
- last_retry_eip = vcpu->arch.last_retry_eip;
- last_retry_addr = vcpu->arch.last_retry_addr;
+ if (emulation_type & EMULTYPE_WRITE_PF_TO_SP)
+ return false;
/*
- * If the emulation is caused by #PF and it is non-page_table
- * writing instruction, it means the VM-EXIT is caused by shadow
- * page protected, we can zap the shadow page and retry this
- * instruction directly.
- *
- * Note: if the guest uses a non-page-table modifying instruction
- * on the PDE that points to the instruction, then we will unmap
- * the instruction and go to an infinite loop. So, we cache the
- * last retried eip and the last fault address, if we meet the eip
- * and the address again, we can break out of the potential infinite
- * loop.
+ * If emulation may have been triggered by a write to a shadowed page
+ * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the
+ * guest to let the CPU re-execute the instruction in the hope that the
+ * CPU can cleanly execute the instruction that KVM failed to emulate.
*/
- vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
-
- if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
- return false;
-
- if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
- WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
- return false;
-
- if (x86_page_table_writing_insn(ctxt))
- return false;
-
- if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
- return false;
-
- vcpu->arch.last_retry_eip = ctxt->eip;
- vcpu->arch.last_retry_addr = cr2_or_gpa;
-
- if (!vcpu->arch.mmu->root_role.direct)
- gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
-
- kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+ __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, true);
+ /*
+ * Retry even if _this_ vCPU didn't unprotect the gfn, as it's possible
+ * all SPTEs were already zapped by a different task. The alternative
+ * is to report the error to userspace and likely terminate the guest,
+ * and the last_retry_{eip,addr} checks will prevent retrying the page
+ * fault indefinitely, i.e. there's nothing to lose by retrying.
+ */
return true;
}
@@ -9176,6 +9090,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
bool writeback = true;
+ if ((emulation_type & EMULTYPE_ALLOW_RETRY_PF) &&
+ (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
+ WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF))))
+ emulation_type &= ~EMULTYPE_ALLOW_RETRY_PF;
+
r = kvm_check_emulate_insn(vcpu, emulation_type, insn, insn_len);
if (r != X86EMUL_CONTINUE) {
if (r == X86EMUL_RETRY_INSTR || r == X86EMUL_PROPAGATE_FAULT)
@@ -9206,8 +9125,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- if (reexecute_instruction(vcpu, cr2_or_gpa,
- emulation_type))
+ if (kvm_unprotect_and_retry_on_failure(vcpu, cr2_or_gpa,
+ emulation_type))
return 1;
if (ctxt->have_exception &&
@@ -9254,7 +9173,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
return 1;
}
- if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
+ /*
+ * If emulation was caused by a write-protection #PF on a non-page_table
+ * writing instruction, try to unprotect the gfn, i.e. zap shadow pages,
+ * and retry the instruction, as the vCPU is likely no longer using the
+ * gfn as a page table.
+ */
+ if ((emulation_type & EMULTYPE_ALLOW_RETRY_PF) &&
+ !x86_page_table_writing_insn(ctxt) &&
+ kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa))
return 1;
/* this is needed for vmware backdoor interface to work since it
@@ -9285,7 +9212,8 @@ restart:
return 1;
if (r == EMULATION_FAILED) {
- if (reexecute_instruction(vcpu, cr2_or_gpa, emulation_type))
+ if (kvm_unprotect_and_retry_on_failure(vcpu, cr2_or_gpa,
+ emulation_type))
return 1;
return handle_emulation_failure(vcpu, emulation_type);
@@ -9753,7 +9681,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
guard(mutex)(&vendor_module_lock);
- if (kvm_x86_ops.hardware_enable) {
+ if (kvm_x86_ops.enable_virtualization_cpu) {
pr_err("already loaded vendor module '%s'\n", kvm_x86_ops.name);
return -EEXIST;
}
@@ -9880,7 +9808,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
return 0;
out_unwind_ops:
- kvm_x86_ops.hardware_enable = NULL;
+ kvm_x86_ops.enable_virtualization_cpu = NULL;
kvm_x86_call(hardware_unsetup)();
out_mmu_exit:
kvm_mmu_vendor_module_exit();
@@ -9921,56 +9849,11 @@ void kvm_x86_vendor_exit(void)
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
#endif
mutex_lock(&vendor_module_lock);
- kvm_x86_ops.hardware_enable = NULL;
+ kvm_x86_ops.enable_virtualization_cpu = NULL;
mutex_unlock(&vendor_module_lock);
}
EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit);
-static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
-{
- /*
- * The vCPU has halted, e.g. executed HLT. Update the run state if the
- * local APIC is in-kernel, the run loop will detect the non-runnable
- * state and halt the vCPU. Exit to userspace if the local APIC is
- * managed by userspace, in which case userspace is responsible for
- * handling wake events.
- */
- ++vcpu->stat.halt_exits;
- if (lapic_in_kernel(vcpu)) {
- vcpu->arch.mp_state = state;
- return 1;
- } else {
- vcpu->run->exit_reason = reason;
- return 0;
- }
-}
-
-int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
-{
- return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
-}
-EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
-
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
-{
- int ret = kvm_skip_emulated_instruction(vcpu);
- /*
- * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
- * KVM_EXIT_DEBUG here.
- */
- return kvm_emulate_halt_noskip(vcpu) && ret;
-}
-EXPORT_SYMBOL_GPL(kvm_emulate_halt);
-
-int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
-{
- int ret = kvm_skip_emulated_instruction(vcpu);
-
- return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD,
- KVM_EXIT_AP_RESET_HOLD) && ret;
-}
-EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
-
#ifdef CONFIG_X86_64
static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
unsigned long clock_type)
@@ -11207,6 +11090,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (vcpu->arch.apic_attention)
kvm_lapic_sync_from_vapic(vcpu);
+ if (unlikely(exit_fastpath == EXIT_FASTPATH_EXIT_USERSPACE))
+ return 0;
+
r = kvm_x86_call(handle_exit)(vcpu, exit_fastpath);
return r;
@@ -11220,6 +11106,67 @@ out:
return r;
}
+static bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
+ !vcpu->arch.apf.halted);
+}
+
+static bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
+{
+ if (!list_empty_careful(&vcpu->async_pf.done))
+ return true;
+
+ if (kvm_apic_has_pending_init_or_sipi(vcpu) &&
+ kvm_apic_init_sipi_allowed(vcpu))
+ return true;
+
+ if (vcpu->arch.pv.pv_unhalted)
+ return true;
+
+ if (kvm_is_exception_pending(vcpu))
+ return true;
+
+ if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+ (vcpu->arch.nmi_pending &&
+ kvm_x86_call(nmi_allowed)(vcpu, false)))
+ return true;
+
+#ifdef CONFIG_KVM_SMM
+ if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
+ (vcpu->arch.smi_pending &&
+ kvm_x86_call(smi_allowed)(vcpu, false)))
+ return true;
+#endif
+
+ if (kvm_test_request(KVM_REQ_PMI, vcpu))
+ return true;
+
+ if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu))
+ return true;
+
+ if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu))
+ return true;
+
+ if (kvm_hv_has_stimer_pending(vcpu))
+ return true;
+
+ if (is_guest_mode(vcpu) &&
+ kvm_x86_ops.nested_ops->has_events &&
+ kvm_x86_ops.nested_ops->has_events(vcpu, false))
+ return true;
+
+ if (kvm_xen_has_pending_events(vcpu))
+ return true;
+
+ return false;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
+}
+
/* Called within kvm->srcu read side. */
static inline int vcpu_block(struct kvm_vcpu *vcpu)
{
@@ -11291,12 +11238,6 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
return 1;
}
-static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
-{
- return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted);
-}
-
/* Called within kvm->srcu read side. */
static int vcpu_run(struct kvm_vcpu *vcpu)
{
@@ -11348,6 +11289,98 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
return r;
}
+static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
+{
+ /*
+ * The vCPU has halted, e.g. executed HLT. Update the run state if the
+ * local APIC is in-kernel, the run loop will detect the non-runnable
+ * state and halt the vCPU. Exit to userspace if the local APIC is
+ * managed by userspace, in which case userspace is responsible for
+ * handling wake events.
+ */
+ ++vcpu->stat.halt_exits;
+ if (lapic_in_kernel(vcpu)) {
+ if (kvm_vcpu_has_events(vcpu))
+ vcpu->arch.pv.pv_unhalted = false;
+ else
+ vcpu->arch.mp_state = state;
+ return 1;
+ } else {
+ vcpu->run->exit_reason = reason;
+ return 0;
+ }
+}
+
+int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
+{
+ return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+ int ret = kvm_skip_emulated_instruction(vcpu);
+ /*
+ * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
+ return kvm_emulate_halt_noskip(vcpu) && ret;
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_halt);
+
+fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ kvm_vcpu_srcu_read_lock(vcpu);
+ ret = kvm_emulate_halt(vcpu);
+ kvm_vcpu_srcu_read_unlock(vcpu);
+
+ if (!ret)
+ return EXIT_FASTPATH_EXIT_USERSPACE;
+
+ if (kvm_vcpu_running(vcpu))
+ return EXIT_FASTPATH_REENTER_GUEST;
+
+ return EXIT_FASTPATH_EXIT_HANDLED;
+}
+EXPORT_SYMBOL_GPL(handle_fastpath_hlt);
+
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
+{
+ int ret = kvm_skip_emulated_instruction(vcpu);
+
+ return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD,
+ KVM_EXIT_AP_RESET_HOLD) && ret;
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
+
+bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_apicv_active(vcpu) &&
+ kvm_x86_call(dy_apicv_has_pending_interrupt)(vcpu);
+}
+
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.preempted_in_kernel;
+}
+
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+ if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
+ return true;
+
+ if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+#ifdef CONFIG_KVM_SMM
+ kvm_test_request(KVM_REQ_SMI, vcpu) ||
+#endif
+ kvm_test_request(KVM_REQ_EVENT, vcpu))
+ return true;
+
+ return kvm_arch_dy_has_pending_interrupt(vcpu);
+}
+
static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
{
return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
@@ -12264,8 +12297,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
- vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
-
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
@@ -12431,6 +12462,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
if (!init_event) {
vcpu->arch.smbase = 0x30000;
+ vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
+
vcpu->arch.msr_misc_features_enables = 0;
vcpu->arch.ia32_misc_enable_msr = MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
@@ -12516,7 +12549,17 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
-int kvm_arch_hardware_enable(void)
+void kvm_arch_enable_virtualization(void)
+{
+ cpu_emergency_register_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu);
+}
+
+void kvm_arch_disable_virtualization(void)
+{
+ cpu_emergency_unregister_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu);
+}
+
+int kvm_arch_enable_virtualization_cpu(void)
{
struct kvm *kvm;
struct kvm_vcpu *vcpu;
@@ -12532,7 +12575,7 @@ int kvm_arch_hardware_enable(void)
if (ret)
return ret;
- ret = kvm_x86_call(hardware_enable)();
+ ret = kvm_x86_call(enable_virtualization_cpu)();
if (ret != 0)
return ret;
@@ -12612,9 +12655,9 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-void kvm_arch_hardware_disable(void)
+void kvm_arch_disable_virtualization_cpu(void)
{
- kvm_x86_call(hardware_disable)();
+ kvm_x86_call(disable_virtualization_cpu)();
drop_user_return_notifiers();
}
@@ -13162,87 +13205,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
kvm_arch_free_memslot(kvm, old);
}
-static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
-{
- if (!list_empty_careful(&vcpu->async_pf.done))
- return true;
-
- if (kvm_apic_has_pending_init_or_sipi(vcpu) &&
- kvm_apic_init_sipi_allowed(vcpu))
- return true;
-
- if (vcpu->arch.pv.pv_unhalted)
- return true;
-
- if (kvm_is_exception_pending(vcpu))
- return true;
-
- if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
- (vcpu->arch.nmi_pending &&
- kvm_x86_call(nmi_allowed)(vcpu, false)))
- return true;
-
-#ifdef CONFIG_KVM_SMM
- if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
- (vcpu->arch.smi_pending &&
- kvm_x86_call(smi_allowed)(vcpu, false)))
- return true;
-#endif
-
- if (kvm_test_request(KVM_REQ_PMI, vcpu))
- return true;
-
- if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu))
- return true;
-
- if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu))
- return true;
-
- if (kvm_hv_has_stimer_pending(vcpu))
- return true;
-
- if (is_guest_mode(vcpu) &&
- kvm_x86_ops.nested_ops->has_events &&
- kvm_x86_ops.nested_ops->has_events(vcpu, false))
- return true;
-
- if (kvm_xen_has_pending_events(vcpu))
- return true;
-
- return false;
-}
-
-int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
-}
-
-bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_apicv_active(vcpu) &&
- kvm_x86_call(dy_apicv_has_pending_interrupt)(vcpu);
-}
-
-bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.preempted_in_kernel;
-}
-
-bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
-{
- if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
- return true;
-
- if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
-#ifdef CONFIG_KVM_SMM
- kvm_test_request(KVM_REQ_SMI, vcpu) ||
-#endif
- kvm_test_request(KVM_REQ_EVENT, vcpu))
- return true;
-
- return kvm_arch_dy_has_pending_interrupt(vcpu);
-}
-
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.guest_state_protected)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 50596f6f8320..a84c48ef5278 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -103,11 +103,18 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
return max(val, min);
}
-#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
+#define MSR_IA32_CR_PAT_DEFAULT \
+ PAT_VALUE(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC)
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
int kvm_check_nested_events(struct kvm_vcpu *vcpu);
+/* Forcibly leave the nested mode in cases like a vCPU reset */
+static inline void kvm_leave_nested(struct kvm_vcpu *vcpu)
+{
+ kvm_x86_ops.nested_ops->leave_nested(vcpu);
+}
+
static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu)
{
return vcpu->arch.last_vmentry_cpu != -1;
@@ -334,6 +341,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len);
fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
+fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu);
extern struct kvm_caps kvm_caps;
extern struct kvm_host_values kvm_host;
@@ -504,13 +512,26 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
+enum kvm_msr_access {
+ MSR_TYPE_R = BIT(0),
+ MSR_TYPE_W = BIT(1),
+ MSR_TYPE_RW = MSR_TYPE_R | MSR_TYPE_W,
+};
+
/*
* Internal error codes that are used to indicate that MSR emulation encountered
- * an error that should result in #GP in the guest, unless userspace
- * handles it.
+ * an error that should result in #GP in the guest, unless userspace handles it.
+ * Note, '1', '0', and negative numbers are off limits, as they are used by KVM
+ * as part of KVM's lightly documented internal KVM_RUN return codes.
+ *
+ * UNSUPPORTED - The MSR isn't supported, either because it is completely
+ * unknown to KVM, or because the MSR should not exist according
+ * to the vCPU model.
+ *
+ * FILTERED - Access to the MSR is denied by a userspace MSR filter.
*/
-#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */
-#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */
+#define KVM_MSR_RET_UNSUPPORTED 2
+#define KVM_MSR_RET_FILTERED 3
#define __cr4_reserved_bits(__cpu_has, __c) \
({ \
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 90afb488b396..b2eff07d65e4 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -16,6 +16,11 @@
cmpxchg8b (\reg)
.endm
+.macro read64_nonatomic reg
+ movl (\reg), %eax
+ movl 4(\reg), %edx
+.endm
+
SYM_FUNC_START(atomic64_read_cx8)
read64 %ecx
RET
@@ -51,7 +56,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8)
movl %edx, %edi
movl %ecx, %ebp
- read64 %ecx
+ read64_nonatomic %ecx
1:
movl %eax, %ebx
movl %edx, %ecx
@@ -79,7 +84,7 @@ addsub_return sub sub sbb
SYM_FUNC_START(atomic64_\func\()_return_cx8)
pushl %ebx
- read64 %esi
+ read64_nonatomic %esi
1:
movl %eax, %ebx
movl %edx, %ecx
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index f73b5ce270b3..feb8cc6a12bf 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -176,15 +176,6 @@ static inline void set_page_memtype(struct page *pg,
}
#endif
-enum {
- PAT_UC = 0, /* uncached */
- PAT_WC = 1, /* Write combining */
- PAT_WT = 4, /* Write Through */
- PAT_WP = 5, /* Write Protected */
- PAT_WB = 6, /* Write Back (default) */
- PAT_UC_MINUS = 7, /* UC, but can be overridden by MTRR */
-};
-
#define CM(c) (_PAGE_CACHE_MODE_ ## c)
static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val,
@@ -194,13 +185,13 @@ static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val,
char *cache_mode;
switch (pat_val) {
- case PAT_UC: cache = CM(UC); cache_mode = "UC "; break;
- case PAT_WC: cache = CM(WC); cache_mode = "WC "; break;
- case PAT_WT: cache = CM(WT); cache_mode = "WT "; break;
- case PAT_WP: cache = CM(WP); cache_mode = "WP "; break;
- case PAT_WB: cache = CM(WB); cache_mode = "WB "; break;
- case PAT_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break;
- default: cache = CM(WB); cache_mode = "WB "; break;
+ case X86_MEMTYPE_UC: cache = CM(UC); cache_mode = "UC "; break;
+ case X86_MEMTYPE_WC: cache = CM(WC); cache_mode = "WC "; break;
+ case X86_MEMTYPE_WT: cache = CM(WT); cache_mode = "WT "; break;
+ case X86_MEMTYPE_WP: cache = CM(WP); cache_mode = "WP "; break;
+ case X86_MEMTYPE_WB: cache = CM(WB); cache_mode = "WB "; break;
+ case X86_MEMTYPE_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break;
+ default: cache = CM(WB); cache_mode = "WB "; break;
}
memcpy(msg, cache_mode, 4);
@@ -257,12 +248,6 @@ void pat_cpu_init(void)
void __init pat_bp_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
-#define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \
- (((u64)PAT_ ## p0) | ((u64)PAT_ ## p1 << 8) | \
- ((u64)PAT_ ## p2 << 16) | ((u64)PAT_ ## p3 << 24) | \
- ((u64)PAT_ ## p4 << 32) | ((u64)PAT_ ## p5 << 40) | \
- ((u64)PAT_ ## p6 << 48) | ((u64)PAT_ ## p7 << 56))
-
if (!IS_ENABLED(CONFIG_X86_PAT))
pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
@@ -293,7 +278,7 @@ void __init pat_bp_init(void)
* NOTE: When WC or WP is used, it is redirected to UC- per
* the default setup in __cachemode2pte_tbl[].
*/
- pat_msr_val = PAT(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC);
+ pat_msr_val = PAT_VALUE(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC);
}
/*
@@ -328,7 +313,7 @@ void __init pat_bp_init(void)
* NOTE: When WT or WP is used, it is redirected to UC- per
* the default setup in __cachemode2pte_tbl[].
*/
- pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC);
+ pat_msr_val = PAT_VALUE(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC);
} else {
/*
* Full PAT support. We put WT in slot 7 to improve
@@ -356,13 +341,12 @@ void __init pat_bp_init(void)
* The reserved slots are unused, but mapped to their
* corresponding types in the presence of PAT errata.
*/
- pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT);
+ pat_msr_val = PAT_VALUE(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT);
}
memory_caching_control |= CACHE_PAT;
init_cache_modes(pat_msr_val);
-#undef PAT
}
static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index f7235ef87bc3..64fca49cd88f 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -7,6 +7,7 @@
.code32
.text
#define _pa(x) ((x) - __START_KERNEL_map)
+#define rva(x) ((x) - pvh_start_xen)
#include <linux/elfnote.h>
#include <linux/init.h>
@@ -15,6 +16,7 @@
#include <asm/segment.h>
#include <asm/asm.h>
#include <asm/boot.h>
+#include <asm/pgtable.h>
#include <asm/processor-flags.h>
#include <asm/msr.h>
#include <asm/nospec-branch.h>
@@ -54,7 +56,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
UNWIND_HINT_END_OF_STACK
cld
- lgdt (_pa(gdt))
+ /*
+ * See the comment for startup_32 for more details. We need to
+ * execute a call to get the execution address to be position
+ * independent, but we don't have a stack. Save and restore the
+ * magic field of start_info in ebx, and use that as the stack.
+ */
+ mov (%ebx), %eax
+ leal 4(%ebx), %esp
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+1: popl %ebp
+ mov %eax, (%ebx)
+ subl $rva(1b), %ebp
+ movl $0, %esp
+
+ leal rva(gdt)(%ebp), %eax
+ leal rva(gdt_start)(%ebp), %ecx
+ movl %ecx, 2(%eax)
+ lgdt (%eax)
mov $PVH_DS_SEL,%eax
mov %eax,%ds
@@ -62,14 +82,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
mov %eax,%ss
/* Stash hvm_start_info. */
- mov $_pa(pvh_start_info), %edi
+ leal rva(pvh_start_info)(%ebp), %edi
mov %ebx, %esi
- mov _pa(pvh_start_info_sz), %ecx
+ movl rva(pvh_start_info_sz)(%ebp), %ecx
shr $2,%ecx
rep
movsl
- mov $_pa(early_stack_end), %esp
+ leal rva(early_stack_end)(%ebp), %esp
/* Enable PAE mode. */
mov %cr4, %eax
@@ -83,31 +103,86 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
btsl $_EFER_LME, %eax
wrmsr
+ mov %ebp, %ebx
+ subl $_pa(pvh_start_xen), %ebx /* offset */
+ jz .Lpagetable_done
+
+ /* Fixup page-tables for relocation. */
+ leal rva(pvh_init_top_pgt)(%ebp), %edi
+ movl $PTRS_PER_PGD, %ecx
+2:
+ testl $_PAGE_PRESENT, 0x00(%edi)
+ jz 1f
+ addl %ebx, 0x00(%edi)
+1:
+ addl $8, %edi
+ decl %ecx
+ jnz 2b
+
+ /* L3 ident has a single entry. */
+ leal rva(pvh_level3_ident_pgt)(%ebp), %edi
+ addl %ebx, 0x00(%edi)
+
+ leal rva(pvh_level3_kernel_pgt)(%ebp), %edi
+ addl %ebx, (PAGE_SIZE - 16)(%edi)
+ addl %ebx, (PAGE_SIZE - 8)(%edi)
+
+ /* pvh_level2_ident_pgt is fine - large pages */
+
+ /* pvh_level2_kernel_pgt needs adjustment - large pages */
+ leal rva(pvh_level2_kernel_pgt)(%ebp), %edi
+ movl $PTRS_PER_PMD, %ecx
+2:
+ testl $_PAGE_PRESENT, 0x00(%edi)
+ jz 1f
+ addl %ebx, 0x00(%edi)
+1:
+ addl $8, %edi
+ decl %ecx
+ jnz 2b
+
+.Lpagetable_done:
/* Enable pre-constructed page tables. */
- mov $_pa(init_top_pgt), %eax
+ leal rva(pvh_init_top_pgt)(%ebp), %eax
mov %eax, %cr3
mov $(X86_CR0_PG | X86_CR0_PE), %eax
mov %eax, %cr0
/* Jump to 64-bit mode. */
- ljmp $PVH_CS_SEL, $_pa(1f)
+ pushl $PVH_CS_SEL
+ leal rva(1f)(%ebp), %eax
+ pushl %eax
+ lretl
/* 64-bit entry point. */
.code64
1:
+ UNWIND_HINT_END_OF_STACK
+
/* Set base address in stack canary descriptor. */
mov $MSR_GS_BASE,%ecx
- mov $_pa(canary), %eax
+ leal canary(%rip), %eax
xor %edx, %edx
wrmsr
+ /*
+ * Calculate load offset and store in phys_base. __pa() needs
+ * phys_base set to calculate the hypercall page in xen_pvh_init().
+ */
+ movq %rbp, %rbx
+ subq $_pa(pvh_start_xen), %rbx
+ movq %rbx, phys_base(%rip)
call xen_prepare_pvh
+ /*
+ * Clear phys_base. __startup_64 will *add* to its value,
+ * so reset to 0.
+ */
+ xor %rbx, %rbx
+ movq %rbx, phys_base(%rip)
/* startup_64 expects boot_params in %rsi. */
- mov $_pa(pvh_bootparams), %rsi
- mov $_pa(startup_64), %rax
- ANNOTATE_RETPOLINE_SAFE
- jmp *%rax
+ lea pvh_bootparams(%rip), %rsi
+ jmp startup_64
#else /* CONFIG_X86_64 */
@@ -143,7 +218,7 @@ SYM_CODE_END(pvh_start_xen)
.balign 8
SYM_DATA_START_LOCAL(gdt)
.word gdt_end - gdt_start
- .long _pa(gdt_start)
+ .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */
.word 0
SYM_DATA_END(gdt)
SYM_DATA_START_LOCAL(gdt_start)
@@ -163,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack)
.fill BOOT_STACK_SIZE, 1, 0
SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
+#ifdef CONFIG_X86_64
+/*
+ * Xen PVH needs a set of identity mapped and kernel high mapping
+ * page tables. pvh_start_xen starts running on the identity mapped
+ * page tables, but xen_prepare_pvh calls into the high mapping.
+ * These page tables need to be relocatable and are only used until
+ * startup_64 transitions to init_top_pgt.
+ */
+SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt)
+ .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0
+ .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+SYM_DATA_END(pvh_init_top_pgt)
+
+SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt)
+ .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .fill 511, 8, 0
+SYM_DATA_END(pvh_level3_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt)
+ /*
+ * Since I easily can, map the first 1G.
+ * Don't set NX because code runs from these pages.
+ *
+ * Note: This sets _PAGE_GLOBAL despite whether
+ * the CPU supports it or it is enabled. But,
+ * the CPU should ignore the bit.
+ */
+ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+SYM_DATA_END(pvh_level2_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt)
+ .fill L3_START_KERNEL, 8, 0
+ /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
+ .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .quad 0 /* no fixmap */
+SYM_DATA_END(pvh_level3_kernel_pgt)
+
+SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt)
+ /*
+ * Kernel high mapping.
+ *
+ * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
+ * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
+ * 512 MiB otherwise.
+ *
+ * (NOTE: after that starts the module area, see MODULES_VADDR.)
+ *
+ * This table is eventually used by the kernel during normal runtime.
+ * Care must be taken to clear out undesired bits later, like _PAGE_RW
+ * or _PAGE_GLOBAL in some cases.
+ */
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE)
+SYM_DATA_END(pvh_level2_kernel_pgt)
+
+ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC,
+ .long CONFIG_PHYSICAL_ALIGN;
+ .long LOAD_PHYSICAL_ADDR;
+ .long KERNEL_IMAGE_SIZE - 1)
+#endif
+
ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
_ASM_PTR (pvh_start_xen - __START_KERNEL_map))
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
index f2383484840d..a1ee415c008d 100644
--- a/arch/x86/um/sysrq_32.c
+++ b/arch/x86/um/sysrq_32.c
@@ -9,7 +9,6 @@
#include <linux/sched/debug.h>
#include <linux/kallsyms.h>
#include <asm/ptrace.h>
-#include <asm/sysrq.h>
/* This is declared by <linux/sched.h> */
void show_regs(struct pt_regs *regs)
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
index 0bf6de40abff..340d8a243c8a 100644
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@@ -12,7 +12,6 @@
#include <linux/utsname.h>
#include <asm/current.h>
#include <asm/ptrace.h>
-#include <asm/sysrq.h>
void show_regs(struct pt_regs *regs)
{
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 728a4366ca85..bf68c329fc01 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -4,6 +4,7 @@
#include <linux/mm.h>
#include <xen/hvc-console.h>
+#include <xen/acpi.h>
#include <asm/bootparam.h>
#include <asm/io_apic.h>
@@ -28,6 +29,28 @@
bool __ro_after_init xen_pvh;
EXPORT_SYMBOL_GPL(xen_pvh);
+#ifdef CONFIG_XEN_DOM0
+int xen_pvh_setup_gsi(int gsi, int trigger, int polarity)
+{
+ int ret;
+ struct physdev_setup_gsi setup_gsi;
+
+ setup_gsi.gsi = gsi;
+ setup_gsi.triggering = (trigger == ACPI_EDGE_SENSITIVE ? 0 : 1);
+ setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
+ if (ret == -EEXIST) {
+ xen_raw_printk("Already setup the GSI :%d\n", gsi);
+ ret = 0;
+ } else if (ret)
+ xen_raw_printk("Fail to setup GSI (%d)!\n", gsi);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xen_pvh_setup_gsi);
+#endif
+
/*
* Reserve e820 UNUSABLE regions to inflate the memory balloon.
*
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index e3a7c2aedd5f..d67f63d93b2a 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -451,7 +451,7 @@ config ACPI_HED
config ACPI_BGRT
bool "Boottime Graphics Resource Table support"
- depends on EFI && (X86 || ARM64)
+ depends on EFI && (X86 || ARM64 || LOONGARCH)
help
This driver adds support for exposing the ACPI Boottime Graphics
Resource Table, which allows the operating system to obtain
diff --git a/drivers/acpi/apei/einj-cxl.c b/drivers/acpi/apei/einj-cxl.c
index 8b8be0c90709..4f81a119ec08 100644
--- a/drivers/acpi/apei/einj-cxl.c
+++ b/drivers/acpi/apei/einj-cxl.c
@@ -7,9 +7,9 @@
*
* Author: Ben Cheatham <benjamin.cheatham@amd.com>
*/
-#include <linux/einj-cxl.h>
#include <linux/seq_file.h>
#include <linux/pci.h>
+#include <cxl/einj.h>
#include "apei-internal.h"
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index 8bc71cdc2270..246076341e8c 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -199,7 +199,6 @@ static const struct file_operations erst_dbg_ops = {
.read = erst_dbg_read,
.write = erst_dbg_write,
.unlocked_ioctl = erst_dbg_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice erst_dbg_dev = {
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 623cc0cb4a65..ada93cfde9ba 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -27,7 +27,6 @@
#include <linux/timer.h>
#include <linux/cper.h>
#include <linux/cleanup.h>
-#include <linux/cxl-event.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/ratelimit.h>
@@ -50,6 +49,7 @@
#include <acpi/apei.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
+#include <cxl/event.h>
#include <ras/ras_event.h>
#include "apei-internal.h"
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index ff30ceca2203..630fe0a34bc6 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -288,7 +288,7 @@ static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
}
#endif /* CONFIG_X86_IO_APIC */
-static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
+struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
{
struct acpi_prt_entry *entry = NULL;
struct pci_dev *bridge;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 3328a6febc13..a4aedf7e1775 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2256,10 +2256,15 @@ static inline u16 ata_xlat_cdl_limit(u8 *buf)
static unsigned int ata_msense_control_spgt2(struct ata_device *dev, u8 *buf,
u8 spg)
{
- u8 *b, *cdl = dev->cdl->desc_log_buf, *desc;
+ u8 *b, *cdl, *desc;
u32 policy;
int i;
+ if (!(dev->flags & ATA_DFLAG_CDL) || !dev->cdl)
+ return 0;
+
+ cdl = dev->cdl->desc_log_buf;
+
/*
* Fill the subpage. The first four bytes of the T2A/T2B mode pages
* are a header. The PAGE LENGTH field is the size of the page
@@ -2356,7 +2361,7 @@ static unsigned int ata_msense_control(struct ata_device *dev, u8 *buf,
case ALL_SUB_MPAGES:
n = ata_msense_control_spg0(dev, buf, changeable);
n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE);
- n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE);
+ n += ata_msense_control_spgt2(dev, buf + n, CDL_T2B_SUB_MPAGE);
n += ata_msense_control_ata_feature(dev, buf + n);
return n;
default:
diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index bb9463814454..19b619376d48 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -526,7 +526,6 @@ static const struct file_operations charlcd_fops = {
.write = charlcd_write,
.open = charlcd_open,
.release = charlcd_release,
- .llseek = no_llseek,
};
static struct miscdevice charlcd_dev = {
diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c
index 01ef796c2055..b6f941a6ab69 100644
--- a/drivers/base/attribute_container.c
+++ b/drivers/base/attribute_container.c
@@ -346,8 +346,7 @@ attribute_container_device_trigger_safe(struct device *dev,
* @fn: the function to execute for each classdev.
*
* This function is for executing a trigger when you need to know both
- * the container and the classdev. If you only care about the
- * container, then use attribute_container_trigger() instead.
+ * the container and the classdev.
*/
void
attribute_container_device_trigger(struct device *dev,
@@ -379,33 +378,6 @@ attribute_container_device_trigger(struct device *dev,
}
/**
- * attribute_container_trigger - trigger a function for each matching container
- *
- * @dev: The generic device to activate the trigger for
- * @fn: the function to trigger
- *
- * This routine triggers a function that only needs to know the
- * matching containers (not the classdev) associated with a device.
- * It is more lightweight than attribute_container_device_trigger, so
- * should be used in preference unless the triggering function
- * actually needs to know the classdev.
- */
-void
-attribute_container_trigger(struct device *dev,
- int (*fn)(struct attribute_container *,
- struct device *))
-{
- struct attribute_container *cont;
-
- mutex_lock(&attribute_container_mutex);
- list_for_each_entry(cont, &attribute_container_list, node) {
- if (cont->match(cont, dev))
- fn(cont, dev);
- }
- mutex_unlock(&attribute_container_mutex);
-}
-
-/**
* attribute_container_add_attrs - add attributes
*
* @classdev: The class device
@@ -459,24 +431,6 @@ attribute_container_add_class_device(struct device *classdev)
}
/**
- * attribute_container_add_class_device_adapter - simple adapter for triggers
- *
- * @cont: the container to register.
- * @dev: the generic device to activate the trigger for
- * @classdev: the class device to add
- *
- * This function is identical to attribute_container_add_class_device except
- * that it is designed to be called from the triggers
- */
-int
-attribute_container_add_class_device_adapter(struct attribute_container *cont,
- struct device *dev,
- struct device *classdev)
-{
- return attribute_container_add_class_device(classdev);
-}
-
-/**
* attribute_container_remove_attrs - remove any attribute files
*
* @classdev: The class device to remove the files from
diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c
index 54b92839e05c..7823888af4f6 100644
--- a/drivers/base/auxiliary.c
+++ b/drivers/base/auxiliary.c
@@ -352,7 +352,7 @@ EXPORT_SYMBOL_GPL(__auxiliary_device_add);
*/
struct auxiliary_device *auxiliary_find_device(struct device *start,
const void *data,
- int (*match)(struct device *dev, const void *data))
+ device_match_t match)
{
struct device *dev;
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 0b53593372d7..8cf04a557bdb 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -145,7 +145,7 @@ void auxiliary_bus_init(void);
static inline void auxiliary_bus_init(void) { }
#endif
-struct kobject *virtual_device_parent(struct device *dev);
+struct kobject *virtual_device_parent(void);
int bus_add_device(struct device *dev);
void bus_probe_device(struct device *dev);
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index ffea0728b8b2..657c93c38b0d 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -152,7 +152,8 @@ static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr,
{
struct bus_attribute *bus_attr = to_bus_attr(attr);
struct subsys_private *subsys_priv = to_subsys_private(kobj);
- ssize_t ret = 0;
+ /* return -EIO for reading a bus attribute without show() */
+ ssize_t ret = -EIO;
if (bus_attr->show)
ret = bus_attr->show(subsys_priv->bus, buf);
@@ -164,7 +165,8 @@ static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr,
{
struct bus_attribute *bus_attr = to_bus_attr(attr);
struct subsys_private *subsys_priv = to_subsys_private(kobj);
- ssize_t ret = 0;
+ /* return -EIO for writing a bus attribute without store() */
+ ssize_t ret = -EIO;
if (bus_attr->store)
ret = bus_attr->store(subsys_priv->bus, buf, count);
@@ -389,7 +391,7 @@ EXPORT_SYMBOL_GPL(bus_for_each_dev);
*/
struct device *bus_find_device(const struct bus_type *bus,
struct device *start, const void *data,
- int (*match)(struct device *dev, const void *data))
+ device_match_t match)
{
struct subsys_private *sp = bus_to_subsys(bus);
struct klist_iter i;
@@ -920,6 +922,8 @@ bus_devices_fail:
bus_remove_file(bus, &bus_attr_uevent);
bus_uevent_fail:
kset_unregister(&priv->subsys);
+ /* Above kset_unregister() will kfree @priv */
+ priv = NULL;
out:
kfree(priv);
return retval;
@@ -1294,7 +1298,7 @@ int subsys_virtual_register(const struct bus_type *subsys,
{
struct kobject *virtual_dir;
- virtual_dir = virtual_device_parent(NULL);
+ virtual_dir = virtual_device_parent();
if (!virtual_dir)
return -ENOMEM;
@@ -1385,8 +1389,13 @@ int __init buses_init(void)
return -ENOMEM;
system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj);
- if (!system_kset)
+ if (!system_kset) {
+ /* Do error handling here as devices_init() do */
+ kset_unregister(bus_kset);
+ bus_kset = NULL;
+ pr_err("%s: failed to create and add kset 'bus'\n", __func__);
return -ENOMEM;
+ }
return 0;
}
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 7b38fdf8e1d7..cb5359235c70 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -183,6 +183,17 @@ int class_register(const struct class *cls)
pr_debug("device class '%s': registering\n", cls->name);
+ if (cls->ns_type && !cls->namespace) {
+ pr_err("%s: class '%s' does not have namespace\n",
+ __func__, cls->name);
+ return -EINVAL;
+ }
+ if (!cls->ns_type && cls->namespace) {
+ pr_err("%s: class '%s' does not have ns_type\n",
+ __func__, cls->name);
+ return -EINVAL;
+ }
+
cp = kzalloc(sizeof(*cp), GFP_KERNEL);
if (!cp)
return -ENOMEM;
@@ -433,8 +444,7 @@ EXPORT_SYMBOL_GPL(class_for_each_device);
* code. There's no locking restriction.
*/
struct device *class_find_device(const struct class *class, const struct device *start,
- const void *data,
- int (*match)(struct device *, const void *))
+ const void *data, device_match_t match)
{
struct subsys_private *sp = class_to_subsys(class);
struct class_dev_iter iter;
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 8c0733d3aad8..a4c853411a6b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -9,29 +9,30 @@
*/
#include <linux/acpi.h>
+#include <linux/blkdev.h>
+#include <linux/cleanup.h>
#include <linux/cpufreq.h>
#include <linux/device.h>
+#include <linux/dma-map-ops.h> /* for dma_default_coherent */
#include <linux/err.h>
#include <linux/fwnode.h>
#include <linux/init.h>
+#include <linux/kdev_t.h>
#include <linux/kstrtox.h>
#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/kdev_t.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/of.h>
#include <linux/of_device.h>
-#include <linux/blkdev.h>
-#include <linux/mutex.h>
#include <linux/pm_runtime.h>
-#include <linux/netdevice.h>
#include <linux/rcupdate.h>
-#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
#include <linux/string_helpers.h>
#include <linux/swiotlb.h>
#include <linux/sysfs.h>
-#include <linux/dma-map-ops.h> /* for dma_default_coherent */
#include "base.h"
#include "physical_location.h"
@@ -97,12 +98,9 @@ static int __fwnode_link_add(struct fwnode_handle *con,
int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup,
u8 flags)
{
- int ret;
+ guard(mutex)(&fwnode_link_lock);
- mutex_lock(&fwnode_link_lock);
- ret = __fwnode_link_add(con, sup, flags);
- mutex_unlock(&fwnode_link_lock);
- return ret;
+ return __fwnode_link_add(con, sup, flags);
}
/**
@@ -143,10 +141,10 @@ static void fwnode_links_purge_suppliers(struct fwnode_handle *fwnode)
{
struct fwnode_link *link, *tmp;
- mutex_lock(&fwnode_link_lock);
+ guard(mutex)(&fwnode_link_lock);
+
list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook)
__fwnode_link_del(link);
- mutex_unlock(&fwnode_link_lock);
}
/**
@@ -159,10 +157,10 @@ static void fwnode_links_purge_consumers(struct fwnode_handle *fwnode)
{
struct fwnode_link *link, *tmp;
- mutex_lock(&fwnode_link_lock);
+ guard(mutex)(&fwnode_link_lock);
+
list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook)
__fwnode_link_del(link);
- mutex_unlock(&fwnode_link_lock);
}
/**
@@ -563,20 +561,11 @@ static struct class devlink_class = {
static int devlink_add_symlinks(struct device *dev)
{
+ char *buf_con __free(kfree) = NULL, *buf_sup __free(kfree) = NULL;
int ret;
- size_t len;
struct device_link *link = to_devlink(dev);
struct device *sup = link->supplier;
struct device *con = link->consumer;
- char *buf;
-
- len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)),
- strlen(dev_bus_name(con)) + strlen(dev_name(con)));
- len += strlen(":");
- len += strlen("supplier:") + 1;
- buf = kzalloc(len, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
ret = sysfs_create_link(&link->link_dev.kobj, &sup->kobj, "supplier");
if (ret)
@@ -586,58 +575,64 @@ static int devlink_add_symlinks(struct device *dev)
if (ret)
goto err_con;
- snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
- ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf);
+ buf_con = kasprintf(GFP_KERNEL, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
+ if (!buf_con) {
+ ret = -ENOMEM;
+ goto err_con_dev;
+ }
+
+ ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf_con);
if (ret)
goto err_con_dev;
- snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
- ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf);
+ buf_sup = kasprintf(GFP_KERNEL, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
+ if (!buf_sup) {
+ ret = -ENOMEM;
+ goto err_sup_dev;
+ }
+
+ ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf_sup);
if (ret)
goto err_sup_dev;
goto out;
err_sup_dev:
- snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
- sysfs_remove_link(&sup->kobj, buf);
+ sysfs_remove_link(&sup->kobj, buf_con);
err_con_dev:
sysfs_remove_link(&link->link_dev.kobj, "consumer");
err_con:
sysfs_remove_link(&link->link_dev.kobj, "supplier");
out:
- kfree(buf);
return ret;
}
static void devlink_remove_symlinks(struct device *dev)
{
+ char *buf_con __free(kfree) = NULL, *buf_sup __free(kfree) = NULL;
struct device_link *link = to_devlink(dev);
- size_t len;
struct device *sup = link->supplier;
struct device *con = link->consumer;
- char *buf;
sysfs_remove_link(&link->link_dev.kobj, "consumer");
sysfs_remove_link(&link->link_dev.kobj, "supplier");
- len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)),
- strlen(dev_bus_name(con)) + strlen(dev_name(con)));
- len += strlen(":");
- len += strlen("supplier:") + 1;
- buf = kzalloc(len, GFP_KERNEL);
- if (!buf) {
- WARN(1, "Unable to properly free device link symlinks!\n");
- return;
- }
-
if (device_is_registered(con)) {
- snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
- sysfs_remove_link(&con->kobj, buf);
+ buf_sup = kasprintf(GFP_KERNEL, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
+ if (!buf_sup)
+ goto out;
+ sysfs_remove_link(&con->kobj, buf_sup);
}
- snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
- sysfs_remove_link(&sup->kobj, buf);
- kfree(buf);
+
+ buf_con = kasprintf(GFP_KERNEL, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
+ if (!buf_con)
+ goto out;
+ sysfs_remove_link(&sup->kobj, buf_con);
+
+ return;
+
+out:
+ WARN(1, "Unable to properly free device link symlinks!\n");
}
static struct class_interface devlink_class_intf = {
@@ -678,6 +673,9 @@ postcore_initcall(devlink_class_init);
* @supplier: Supplier end of the link.
* @flags: Link flags.
*
+ * Return: On success, a device_link struct will be returned.
+ * On error or invalid flag settings, NULL will be returned.
+ *
* The caller is responsible for the proper synchronization of the link creation
* with runtime PM. First, setting the DL_FLAG_PM_RUNTIME flag will cause the
* runtime PM framework to take the link into account. Second, if the
@@ -1061,20 +1059,16 @@ int device_links_check_suppliers(struct device *dev)
* Device waiting for supplier to become available is not allowed to
* probe.
*/
- mutex_lock(&fwnode_link_lock);
- sup_fw = fwnode_links_check_suppliers(dev->fwnode);
- if (sup_fw) {
- if (!dev_is_best_effort(dev)) {
- fwnode_ret = -EPROBE_DEFER;
- dev_err_probe(dev, -EPROBE_DEFER,
- "wait for supplier %pfwf\n", sup_fw);
- } else {
- fwnode_ret = -EAGAIN;
+ scoped_guard(mutex, &fwnode_link_lock) {
+ sup_fw = fwnode_links_check_suppliers(dev->fwnode);
+ if (sup_fw) {
+ if (dev_is_best_effort(dev))
+ fwnode_ret = -EAGAIN;
+ else
+ return dev_err_probe(dev, -EPROBE_DEFER,
+ "wait for supplier %pfwf\n", sup_fw);
}
}
- mutex_unlock(&fwnode_link_lock);
- if (fwnode_ret == -EPROBE_DEFER)
- return fwnode_ret;
device_links_write_lock();
@@ -1093,10 +1087,8 @@ int device_links_check_suppliers(struct device *dev)
}
device_links_missing_supplier(dev);
- dev_err_probe(dev, -EPROBE_DEFER,
- "supplier %s not ready\n",
- dev_name(link->supplier));
- ret = -EPROBE_DEFER;
+ ret = dev_err_probe(dev, -EPROBE_DEFER,
+ "supplier %s not ready\n", dev_name(link->supplier));
break;
}
WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE);
@@ -1249,9 +1241,8 @@ static ssize_t waiting_for_supplier_show(struct device *dev,
bool val;
device_lock(dev);
- mutex_lock(&fwnode_link_lock);
- val = !!fwnode_links_check_suppliers(dev->fwnode);
- mutex_unlock(&fwnode_link_lock);
+ scoped_guard(mutex, &fwnode_link_lock)
+ val = !!fwnode_links_check_suppliers(dev->fwnode);
device_unlock(dev);
return sysfs_emit(buf, "%u\n", val);
}
@@ -1324,13 +1315,15 @@ void device_links_driver_bound(struct device *dev)
*/
if (dev->fwnode && dev->fwnode->dev == dev) {
struct fwnode_handle *child;
+
fwnode_links_purge_suppliers(dev->fwnode);
- mutex_lock(&fwnode_link_lock);
+
+ guard(mutex)(&fwnode_link_lock);
+
fwnode_for_each_available_child_node(dev->fwnode, child)
__fw_devlink_pickup_dangling_consumers(child,
dev->fwnode);
__fw_devlink_link_to_consumers(dev);
- mutex_unlock(&fwnode_link_lock);
}
device_remove_file(dev, &dev_attr_waiting_for_supplier);
@@ -2339,10 +2332,10 @@ static void fw_devlink_link_device(struct device *dev)
fw_devlink_parse_fwtree(fwnode);
- mutex_lock(&fwnode_link_lock);
+ guard(mutex)(&fwnode_link_lock);
+
__fw_devlink_link_to_consumers(dev);
__fw_devlink_link_to_suppliers(dev, fwnode);
- mutex_unlock(&fwnode_link_lock);
}
/* Device links support end. */
@@ -2591,7 +2584,7 @@ static const void *device_namespace(const struct kobject *kobj)
const struct device *dev = kobj_to_dev(kobj);
const void *ns = NULL;
- if (dev->class && dev->class->ns_type)
+ if (dev->class && dev->class->namespace)
ns = dev->class->namespace(dev);
return ns;
@@ -3170,7 +3163,7 @@ void device_initialize(struct device *dev)
}
EXPORT_SYMBOL_GPL(device_initialize);
-struct kobject *virtual_device_parent(struct device *dev)
+struct kobject *virtual_device_parent(void)
{
static struct kobject *virtual_dir = NULL;
@@ -3248,7 +3241,7 @@ static struct kobject *get_device_parent(struct device *dev,
* in a "glue" directory to prevent namespace collisions.
*/
if (parent == NULL)
- parent_kobj = virtual_device_parent(dev);
+ parent_kobj = virtual_device_parent();
else if (parent->class && !dev->class->ns_type) {
subsys_put(sp);
return &parent->kobj;
@@ -4003,7 +3996,7 @@ int device_for_each_child(struct device *parent, void *data,
struct device *child;
int error = 0;
- if (!parent->p)
+ if (!parent || !parent->p)
return 0;
klist_iter_init(&parent->p->klist_children, &i);
@@ -4033,7 +4026,7 @@ int device_for_each_child_reverse(struct device *parent, void *data,
struct device *child;
int error = 0;
- if (!parent->p)
+ if (!parent || !parent->p)
return 0;
klist_iter_init(&parent->p->klist_children, &i);
@@ -4067,7 +4060,7 @@ struct device *device_find_child(struct device *parent, void *data,
struct klist_iter i;
struct device *child;
- if (!parent)
+ if (!parent || !parent->p)
return NULL;
klist_iter_init(&parent->p->klist_children, &i);
@@ -4515,9 +4508,11 @@ EXPORT_SYMBOL_GPL(device_destroy);
*/
int device_rename(struct device *dev, const char *new_name)
{
+ struct subsys_private *sp = NULL;
struct kobject *kobj = &dev->kobj;
char *old_device_name = NULL;
int error;
+ bool is_link_renamed = false;
dev = get_device(dev);
if (!dev)
@@ -4532,7 +4527,7 @@ int device_rename(struct device *dev, const char *new_name)
}
if (dev->class) {
- struct subsys_private *sp = class_to_subsys(dev->class);
+ sp = class_to_subsys(dev->class);
if (!sp) {
error = -EINVAL;
@@ -4541,16 +4536,19 @@ int device_rename(struct device *dev, const char *new_name)
error = sysfs_rename_link_ns(&sp->subsys.kobj, kobj, old_device_name,
new_name, kobject_namespace(kobj));
- subsys_put(sp);
if (error)
goto out;
+
+ is_link_renamed = true;
}
error = kobject_rename(kobj, new_name);
- if (error)
- goto out;
-
out:
+ if (error && is_link_renamed)
+ sysfs_rename_link_ns(&sp->subsys.kobj, kobj, new_name,
+ old_device_name, kobject_namespace(kobj));
+ subsys_put(sp);
+
put_device(dev);
kfree(old_device_name);
@@ -4872,7 +4870,7 @@ set_dev_info(const struct device *dev, struct dev_printk_info *dev_info)
else
return;
- strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem));
+ strscpy(dev_info->subsystem, subsys);
/*
* Add device identifier DEVICE=:
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 964111361497..f0e4b4aba885 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -248,7 +248,7 @@ static int deferred_devs_show(struct seq_file *s, void *data)
list_for_each_entry(curr, &deferred_probe_pending_list, deferred_probe)
seq_printf(s, "%s\t%s", dev_name(curr->device),
- curr->device->p->deferred_probe_reason ?: "\n");
+ curr->deferred_probe_reason ?: "\n");
mutex_unlock(&deferred_probe_mutex);
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index a2ce0ead06a6..2152eec0c135 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -1231,6 +1231,6 @@ void devm_free_percpu(struct device *dev, void __percpu *pdata)
* devm_free_pages() does.
*/
WARN_ON(devres_release(dev, devm_percpu_release, devm_percpu_match,
- (__force void *)pdata));
+ (void *)(__force unsigned long)pdata));
}
EXPORT_SYMBOL_GPL(devm_free_percpu);
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index 88c6fd1f1992..b4eb5b89c4ee 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(driver_for_each_device);
*/
struct device *driver_find_device(const struct device_driver *drv,
struct device *start, const void *data,
- int (*match)(struct device *dev, const void *data))
+ device_match_t match)
{
struct klist_iter i;
struct device *dev;
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index a03ee4b11134..324a9a3c087a 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -849,6 +849,26 @@ static void fw_log_firmware_info(const struct firmware *fw, const char *name,
{}
#endif
+/*
+ * Reject firmware file names with ".." path components.
+ * There are drivers that construct firmware file names from device-supplied
+ * strings, and we don't want some device to be able to tell us "I would like to
+ * be sent my firmware from ../../../etc/shadow, please".
+ *
+ * Search for ".." surrounded by either '/' or start/end of string.
+ *
+ * This intentionally only looks at the firmware name, not at the firmware base
+ * directory or at symlink contents.
+ */
+static bool name_contains_dotdot(const char *name)
+{
+ size_t name_len = strlen(name);
+
+ return strcmp(name, "..") == 0 || strncmp(name, "../", 3) == 0 ||
+ strstr(name, "/../") != NULL ||
+ (name_len >= 3 && strcmp(name+name_len-3, "/..") == 0);
+}
+
/* called from request_firmware() and request_firmware_work_func() */
static int
_request_firmware(const struct firmware **firmware_p, const char *name,
@@ -869,6 +889,14 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
goto out;
}
+ if (name_contains_dotdot(name)) {
+ dev_warn(device,
+ "Firmware load for '%s' refused, path contains '..' component\n",
+ name);
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = _request_firmware_prepare(&fw, name, device, buf, size,
offset, opt_flags);
if (ret <= 0) /* error or already assigned */
@@ -946,6 +974,8 @@ out:
* @name will be used as $FIRMWARE in the uevent environment and
* should be distinctive enough not to be confused with any other
* firmware image for this or any other device.
+ * It must not contain any ".." path components - "foo/bar..bin" is
+ * allowed, but "foo/../bar.bin" is not.
*
* Caller must hold the reference count of @device.
*
diff --git a/drivers/base/module.c b/drivers/base/module.c
index f742ad2a21da..c4eaa1158d54 100644
--- a/drivers/base/module.c
+++ b/drivers/base/module.c
@@ -66,27 +66,31 @@ int module_add_driver(struct module *mod, const struct device_driver *drv)
driver_name = make_driver_name(drv);
if (!driver_name) {
ret = -ENOMEM;
- goto out;
+ goto out_remove_kobj;
}
module_create_drivers_dir(mk);
if (!mk->drivers_dir) {
ret = -EINVAL;
- goto out;
+ goto out_free_driver_name;
}
ret = sysfs_create_link(mk->drivers_dir, &drv->p->kobj, driver_name);
if (ret)
- goto out;
+ goto out_remove_drivers_dir;
kfree(driver_name);
return 0;
-out:
- sysfs_remove_link(&drv->p->kobj, "module");
+
+out_remove_drivers_dir:
sysfs_remove_link(mk->drivers_dir, driver_name);
+
+out_free_driver_name:
kfree(driver_name);
+out_remove_kobj:
+ sysfs_remove_link(&drv->p->kobj, "module");
return ret;
}
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 4c3ee6521ba5..6f2a33722c52 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1474,7 +1474,7 @@ static const struct dev_pm_ops platform_dev_pm_ops = {
USE_PLATFORM_PM_SLEEP_OPS
};
-struct bus_type platform_bus_type = {
+const struct bus_type platform_bus_type = {
.name = "platform",
.dev_groups = platform_dev_groups,
.match = platform_match,
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 11901f2812ad..223faa9d5ffd 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2259,14 +2259,12 @@ static const struct file_operations mtip_regs_fops = {
.owner = THIS_MODULE,
.open = simple_open,
.read = mtip_hw_read_registers,
- .llseek = no_llseek,
};
static const struct file_operations mtip_flags_fops = {
.owner = THIS_MODULE,
.open = simple_open,
.read = mtip_hw_read_flags,
- .llseek = no_llseek,
};
static void mtip_hw_debugfs_init(struct driver_data *dd)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 3edb37a41312..499c110465e3 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2835,7 +2835,6 @@ static const struct file_operations pkt_ctl_fops = {
.compat_ioctl = pkt_ctl_compat_ioctl,
#endif
.owner = THIS_MODULE,
- .llseek = no_llseek,
};
static struct miscdevice pkt_misc = {
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index bca06bfb4bc3..a6c8e5cc6051 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1983,7 +1983,6 @@ static const struct file_operations ublk_ch_fops = {
.owner = THIS_MODULE,
.open = ublk_ch_open,
.release = ublk_ch_release,
- .llseek = no_llseek,
.read_iter = ublk_ch_read_iter,
.write_iter = ublk_ch_write_iter,
.uring_cmd = ublk_ch_uring_cmd,
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index c3d245617083..ad9c9bc3ccfc 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -2115,8 +2115,10 @@ static void zram_destroy_comps(struct zram *zram)
zram->num_active_comps--;
}
- for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
- kfree(zram->comp_algs[prio]);
+ for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
+ /* Do not free statically defined compression algorithms */
+ if (zram->comp_algs[prio] != default_compressor)
+ kfree(zram->comp_algs[prio]);
zram->comp_algs[prio] = NULL;
}
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 43e9ac5a3324..aa6af351d02d 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -679,7 +679,6 @@ static const struct file_operations vhci_fops = {
.poll = vhci_poll,
.open = vhci_open,
.release = vhci_release,
- .llseek = no_llseek,
};
static struct miscdevice vhci_miscdev = {
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index dd68b8191a0a..930d8a3ba722 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -309,7 +309,7 @@ static struct attribute *fsl_mc_bus_attrs[] = {
ATTRIBUTE_GROUPS(fsl_mc_bus);
-struct bus_type fsl_mc_bus_type = {
+const struct bus_type fsl_mc_bus_type = {
.name = "fsl-mc",
.match = fsl_mc_bus_match,
.uevent = fsl_mc_bus_uevent,
diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c
index 8412406c4f1d..6276551d7968 100644
--- a/drivers/bus/moxtet.c
+++ b/drivers/bus/moxtet.c
@@ -484,7 +484,6 @@ static const struct file_operations input_fops = {
.owner = THIS_MODULE,
.open = moxtet_debug_open,
.read = input_read,
- .llseek = no_llseek,
};
static ssize_t output_read(struct file *file, char __user *buf, size_t len,
@@ -549,7 +548,6 @@ static const struct file_operations output_fops = {
.open = moxtet_debug_open,
.read = output_read,
.write = output_write,
- .llseek = no_llseek,
};
static int moxtet_register_debugfs(struct moxtet *moxtet)
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index 69314532f38c..9fed9706d9cd 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -111,7 +111,6 @@ static irqreturn_t ac_interrupt(int, void *);
static const struct file_operations ac_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = ac_read,
.write = ac_write,
.unlocked_ioctl = ac_ioctl,
diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c
index a4f4291b4492..44a1cdbd4bfb 100644
--- a/drivers/char/ds1620.c
+++ b/drivers/char/ds1620.c
@@ -353,7 +353,6 @@ static const struct file_operations ds1620_fops = {
.open = ds1620_open,
.read = ds1620_read,
.unlocked_ioctl = ds1620_unlocked_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice ds1620_miscdev = {
diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c
index 5a1a73310e97..27f5f9d19531 100644
--- a/drivers/char/dtlk.c
+++ b/drivers/char/dtlk.c
@@ -107,7 +107,6 @@ static const struct file_operations dtlk_fops =
.unlocked_ioctl = dtlk_ioctl,
.open = dtlk_open,
.release = dtlk_release,
- .llseek = no_llseek,
};
/* local prototypes */
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 3dadc4accee3..e904e476e49a 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -700,7 +700,6 @@ hpet_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static const struct file_operations hpet_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = hpet_read,
.poll = hpet_poll,
.unlocked_ioctl = hpet_ioctl,
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 9a459257489f..335eea80054e 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -903,7 +903,6 @@ static const struct file_operations ipmi_wdog_fops = {
.open = ipmi_open,
.release = ipmi_close,
.fasync = ipmi_fasync,
- .llseek = no_llseek,
};
static struct miscdevice ipmi_wdog_miscdev = {
diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c
index c39a836ebd15..5f4696813cea 100644
--- a/drivers/char/pc8736x_gpio.c
+++ b/drivers/char/pc8736x_gpio.c
@@ -235,7 +235,6 @@ static const struct file_operations pc8736x_gpio_fileops = {
.open = pc8736x_gpio_open,
.write = nsc_gpio_write,
.read = nsc_gpio_read,
- .llseek = no_llseek,
};
static void __init pc8736x_init_shadow(void)
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index eaff98dbaa8c..d1dfbd8d4d42 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -786,7 +786,6 @@ static const struct class ppdev_class = {
static const struct file_operations pp_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = pp_read,
.write = pp_write,
.poll = pp_poll,
diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c
index 9f701dcba95c..700e6affea6f 100644
--- a/drivers/char/scx200_gpio.c
+++ b/drivers/char/scx200_gpio.c
@@ -68,7 +68,6 @@ static const struct file_operations scx200_gpio_fileops = {
.read = nsc_gpio_read,
.open = scx200_gpio_open,
.release = scx200_gpio_release,
- .llseek = no_llseek,
};
static struct cdev scx200_gpio_cdev; /* use 1 cdev for all pins */
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index bb5115b1736a..0f8185e541ed 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -1054,7 +1054,6 @@ static const struct file_operations sonypi_misc_fops = {
.release = sonypi_misc_release,
.fasync = sonypi_misc_fasync,
.unlocked_ioctl = sonypi_misc_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice sonypi_misc_device = {
diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c
index e2c0baa69fef..97c94b5e9340 100644
--- a/drivers/char/tpm/tpm-dev.c
+++ b/drivers/char/tpm/tpm-dev.c
@@ -59,7 +59,6 @@ static int tpm_release(struct inode *inode, struct file *file)
const struct file_operations tpm_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.open = tpm_open,
.read = tpm_common_read,
.write = tpm_common_write,
diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c
index 11c502039faf..8fe4a01eea12 100644
--- a/drivers/char/tpm/tpm_vtpm_proxy.c
+++ b/drivers/char/tpm/tpm_vtpm_proxy.c
@@ -243,7 +243,6 @@ static int vtpm_proxy_fops_release(struct inode *inode, struct file *filp)
static const struct file_operations vtpm_proxy_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = vtpm_proxy_fops_read,
.write = vtpm_proxy_fops_write,
.poll = vtpm_proxy_fops_poll,
diff --git a/drivers/char/tpm/tpmrm-dev.c b/drivers/char/tpm/tpmrm-dev.c
index eef0fb06ea83..c25df7ea064e 100644
--- a/drivers/char/tpm/tpmrm-dev.c
+++ b/drivers/char/tpm/tpmrm-dev.c
@@ -46,7 +46,6 @@ static int tpmrm_release(struct inode *inode, struct file *file)
const struct file_operations tpmrm_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.open = tpmrm_open,
.read = tpm_common_read,
.write = tpm_common_write,
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index de7d720d99fa..99a7f2441e70 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1093,7 +1093,6 @@ static const struct file_operations port_fops = {
.poll = port_fops_poll,
.release = port_fops_release,
.fasync = port_fops_fasync,
- .llseek = no_llseek,
};
/*
diff --git a/drivers/counter/counter-chrdev.c b/drivers/counter/counter-chrdev.c
index afc94d0062b1..3ee75e1a78cd 100644
--- a/drivers/counter/counter-chrdev.c
+++ b/drivers/counter/counter-chrdev.c
@@ -454,7 +454,6 @@ out_unlock:
static const struct file_operations counter_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = counter_chrdev_read,
.poll = counter_chrdev_poll,
.unlocked_ioctl = counter_chrdev_ioctl,
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index bb83867d9fec..ef1621d40f05 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -9,13 +9,12 @@
#include "cxlmem.h"
#include "core.h"
#include "cxl.h"
-#include "core.h"
struct dsmas_entry {
struct range dpa_range;
u8 handle;
struct access_coordinate coord[ACCESS_COORDINATE_MAX];
-
+ struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX];
int entries;
int qos_class;
};
@@ -163,7 +162,7 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
dslbis->data_type);
- cxl_access_coordinate_set(dent->coord, dslbis->data_type, val);
+ cxl_access_coordinate_set(dent->cdat_coord, dslbis->data_type, val);
return 0;
}
@@ -220,7 +219,7 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
xa_for_each(dsmas_xa, index, dent) {
int qos_class;
- cxl_coordinates_combine(dent->coord, dent->coord, ep_c);
+ cxl_coordinates_combine(dent->coord, dent->cdat_coord, ep_c);
dent->entries = 1;
rc = cxl_root->ops->qos_class(cxl_root,
&dent->coord[ACCESS_COORDINATE_CPU],
@@ -241,8 +240,10 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
struct cxl_dpa_perf *dpa_perf)
{
- for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
dpa_perf->coord[i] = dent->coord[i];
+ dpa_perf->cdat_coord[i] = dent->cdat_coord[i];
+ }
dpa_perf->dpa_range = dent->dpa_range;
dpa_perf->qos_class = dent->qos_class;
dev_dbg(dev,
@@ -546,19 +547,37 @@ void cxl_coordinates_combine(struct access_coordinate *out,
MODULE_IMPORT_NS(CXL);
-void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
- struct cxl_endpoint_decoder *cxled)
+static void cxl_bandwidth_add(struct access_coordinate *coord,
+ struct access_coordinate *c1,
+ struct access_coordinate *c2)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ coord[i].read_bandwidth = c1[i].read_bandwidth +
+ c2[i].read_bandwidth;
+ coord[i].write_bandwidth = c1[i].write_bandwidth +
+ c2[i].write_bandwidth;
+ }
+}
+
+static bool dpa_perf_contains(struct cxl_dpa_perf *perf,
+ struct resource *dpa_res)
{
- struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct range dpa = {
- .start = cxled->dpa_res->start,
- .end = cxled->dpa_res->end,
+ .start = dpa_res->start,
+ .end = dpa_res->end,
};
+
+ return range_contains(&perf->dpa_range, &dpa);
+}
+
+static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled,
+ enum cxl_decoder_mode mode)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_dpa_perf *perf;
- switch (cxlr->mode) {
+ switch (mode) {
case CXL_DECODER_RAM:
perf = &mds->ram_perf;
break;
@@ -566,12 +585,473 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
perf = &mds->pmem_perf;
break;
default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!dpa_perf_contains(perf, cxled->dpa_res))
+ return ERR_PTR(-EINVAL);
+
+ return perf;
+}
+
+/*
+ * Transient context for containing the current calculation of bandwidth when
+ * doing walking the port hierarchy to deal with shared upstream link.
+ */
+struct cxl_perf_ctx {
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+ struct cxl_port *port;
+};
+
+/**
+ * cxl_endpoint_gather_bandwidth - collect all the endpoint bandwidth in an xarray
+ * @cxlr: CXL region for the bandwidth calculation
+ * @cxled: endpoint decoder to start on
+ * @usp_xa: (output) the xarray that collects all the bandwidth coordinates
+ * indexed by the upstream device with data of 'struct cxl_perf_ctx'.
+ * @gp_is_root: (output) bool of whether the grandparent is cxl root.
+ *
+ * Return: 0 for success or -errno
+ *
+ * Collects aggregated endpoint bandwidth and store the bandwidth in
+ * an xarray indexed by the upstream device of the switch or the RP
+ * device. Each endpoint consists the minimum of the bandwidth from DSLBIS
+ * from the endpoint CDAT, the endpoint upstream link bandwidth, and the
+ * bandwidth from the SSLBIS of the switch CDAT for the switch upstream port to
+ * the downstream port that's associated with the endpoint. If the
+ * device is directly connected to a RP, then no SSLBIS is involved.
+ */
+static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ struct xarray *usp_xa,
+ bool *gp_is_root)
+{
+ struct cxl_port *endpoint = to_cxl_port(cxled->cxld.dev.parent);
+ struct cxl_port *parent_port = to_cxl_port(endpoint->dev.parent);
+ struct cxl_port *gp_port = to_cxl_port(parent_port->dev.parent);
+ struct access_coordinate pci_coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate sw_coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate ep_coord[ACCESS_COORDINATE_MAX];
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ struct cxl_perf_ctx *perf_ctx;
+ struct cxl_dpa_perf *perf;
+ unsigned long index;
+ void *ptr;
+ int rc;
+
+ if (cxlds->rcd)
+ return -ENODEV;
+
+ perf = cxled_get_dpa_perf(cxled, cxlr->mode);
+ if (IS_ERR(perf))
+ return PTR_ERR(perf);
+
+ gp_port = to_cxl_port(parent_port->dev.parent);
+ *gp_is_root = is_cxl_root(gp_port);
+
+ /*
+ * If the grandparent is cxl root, then index is the root port,
+ * otherwise it's the parent switch upstream device.
+ */
+ if (*gp_is_root)
+ index = (unsigned long)endpoint->parent_dport->dport_dev;
+ else
+ index = (unsigned long)parent_port->uport_dev;
+
+ perf_ctx = xa_load(usp_xa, index);
+ if (!perf_ctx) {
+ struct cxl_perf_ctx *c __free(kfree) =
+ kzalloc(sizeof(*perf_ctx), GFP_KERNEL);
+
+ if (!c)
+ return -ENOMEM;
+ ptr = xa_store(usp_xa, index, c, GFP_KERNEL);
+ if (xa_is_err(ptr))
+ return xa_err(ptr);
+ perf_ctx = no_free_ptr(c);
+ perf_ctx->port = parent_port;
+ }
+
+ /* Direct upstream link from EP bandwidth */
+ rc = cxl_pci_get_bandwidth(pdev, pci_coord);
+ if (rc < 0)
+ return rc;
+
+ /*
+ * Min of upstream link bandwidth and Endpoint CDAT bandwidth from
+ * DSLBIS.
+ */
+ cxl_coordinates_combine(ep_coord, pci_coord, perf->cdat_coord);
+
+ /*
+ * If grandparent port is root, then there's no switch involved and
+ * the endpoint is connected to a root port.
+ */
+ if (!*gp_is_root) {
+ /*
+ * Retrieve the switch SSLBIS for switch downstream port
+ * associated with the endpoint bandwidth.
+ */
+ rc = cxl_port_get_switch_dport_bandwidth(endpoint, sw_coord);
+ if (rc)
+ return rc;
+
+ /*
+ * Min of the earlier coordinates with the switch SSLBIS
+ * bandwidth
+ */
+ cxl_coordinates_combine(ep_coord, ep_coord, sw_coord);
+ }
+
+ /*
+ * Aggregate the computed bandwidth with the current aggregated bandwidth
+ * of the endpoints with the same switch upstream device or RP.
+ */
+ cxl_bandwidth_add(perf_ctx->coord, perf_ctx->coord, ep_coord);
+
+ return 0;
+}
+
+static void free_perf_xa(struct xarray *xa)
+{
+ struct cxl_perf_ctx *ctx;
+ unsigned long index;
+
+ if (!xa)
return;
+
+ xa_for_each(xa, index, ctx)
+ kfree(ctx);
+ xa_destroy(xa);
+ kfree(xa);
+}
+DEFINE_FREE(free_perf_xa, struct xarray *, if (_T) free_perf_xa(_T))
+
+/**
+ * cxl_switch_gather_bandwidth - collect all the bandwidth at switch level in an xarray
+ * @cxlr: The region being operated on
+ * @input_xa: xarray indexed by upstream device of a switch with data of 'struct
+ * cxl_perf_ctx'
+ * @gp_is_root: (output) bool of whether the grandparent is cxl root.
+ *
+ * Return: a xarray of resulting cxl_perf_ctx per parent switch or root port
+ * or ERR_PTR(-errno)
+ *
+ * Iterate through the xarray. Take the minimum of the downstream calculated
+ * bandwidth, the upstream link bandwidth, and the SSLBIS of the upstream
+ * switch if exists. Sum the resulting bandwidth under the switch upstream
+ * device or a RP device. The function can be iterated over multiple switches
+ * if the switches are present.
+ */
+static struct xarray *cxl_switch_gather_bandwidth(struct cxl_region *cxlr,
+ struct xarray *input_xa,
+ bool *gp_is_root)
+{
+ struct xarray *res_xa __free(free_perf_xa) =
+ kzalloc(sizeof(*res_xa), GFP_KERNEL);
+ struct access_coordinate coords[ACCESS_COORDINATE_MAX];
+ struct cxl_perf_ctx *ctx, *us_ctx;
+ unsigned long index, us_index;
+ int dev_count = 0;
+ int gp_count = 0;
+ void *ptr;
+ int rc;
+
+ if (!res_xa)
+ return ERR_PTR(-ENOMEM);
+ xa_init(res_xa);
+
+ xa_for_each(input_xa, index, ctx) {
+ struct device *dev = (struct device *)index;
+ struct cxl_port *port = ctx->port;
+ struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
+ struct cxl_port *gp_port = to_cxl_port(parent_port->dev.parent);
+ struct cxl_dport *dport = port->parent_dport;
+ bool is_root = false;
+
+ dev_count++;
+ if (is_cxl_root(gp_port)) {
+ is_root = true;
+ gp_count++;
+ }
+
+ /*
+ * If the grandparent is cxl root, then index is the root port,
+ * otherwise it's the parent switch upstream device.
+ */
+ if (is_root)
+ us_index = (unsigned long)port->parent_dport->dport_dev;
+ else
+ us_index = (unsigned long)parent_port->uport_dev;
+
+ us_ctx = xa_load(res_xa, us_index);
+ if (!us_ctx) {
+ struct cxl_perf_ctx *n __free(kfree) =
+ kzalloc(sizeof(*n), GFP_KERNEL);
+
+ if (!n)
+ return ERR_PTR(-ENOMEM);
+
+ ptr = xa_store(res_xa, us_index, n, GFP_KERNEL);
+ if (xa_is_err(ptr))
+ return ERR_PTR(xa_err(ptr));
+ us_ctx = no_free_ptr(n);
+ us_ctx->port = parent_port;
+ }
+
+ /*
+ * If the device isn't an upstream PCIe port, there's something
+ * wrong with the topology.
+ */
+ if (!dev_is_pci(dev))
+ return ERR_PTR(-EINVAL);
+
+ /* Retrieve the upstream link bandwidth */
+ rc = cxl_pci_get_bandwidth(to_pci_dev(dev), coords);
+ if (rc)
+ return ERR_PTR(-ENXIO);
+
+ /*
+ * Take the min of downstream bandwidth and the upstream link
+ * bandwidth.
+ */
+ cxl_coordinates_combine(coords, coords, ctx->coord);
+
+ /*
+ * Take the min of the calculated bandwdith and the upstream
+ * switch SSLBIS bandwidth if there's a parent switch
+ */
+ if (!is_root)
+ cxl_coordinates_combine(coords, coords, dport->coord);
+
+ /*
+ * Aggregate the calculated bandwidth common to an upstream
+ * switch.
+ */
+ cxl_bandwidth_add(us_ctx->coord, us_ctx->coord, coords);
}
+ /* Asymmetric topology detected. */
+ if (gp_count) {
+ if (gp_count != dev_count) {
+ dev_dbg(&cxlr->dev,
+ "Asymmetric hierarchy detected, bandwidth not updated\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ *gp_is_root = true;
+ }
+
+ return no_free_ptr(res_xa);
+}
+
+/**
+ * cxl_rp_gather_bandwidth - handle the root port level bandwidth collection
+ * @xa: the xarray that holds the cxl_perf_ctx that has the bandwidth calculated
+ * below each root port device.
+ *
+ * Return: xarray that holds cxl_perf_ctx per host bridge or ERR_PTR(-errno)
+ */
+static struct xarray *cxl_rp_gather_bandwidth(struct xarray *xa)
+{
+ struct xarray *hb_xa __free(free_perf_xa) =
+ kzalloc(sizeof(*hb_xa), GFP_KERNEL);
+ struct cxl_perf_ctx *ctx;
+ unsigned long index;
+
+ if (!hb_xa)
+ return ERR_PTR(-ENOMEM);
+ xa_init(hb_xa);
+
+ xa_for_each(xa, index, ctx) {
+ struct cxl_port *port = ctx->port;
+ unsigned long hb_index = (unsigned long)port->uport_dev;
+ struct cxl_perf_ctx *hb_ctx;
+ void *ptr;
+
+ hb_ctx = xa_load(hb_xa, hb_index);
+ if (!hb_ctx) {
+ struct cxl_perf_ctx *n __free(kfree) =
+ kzalloc(sizeof(*n), GFP_KERNEL);
+
+ if (!n)
+ return ERR_PTR(-ENOMEM);
+ ptr = xa_store(hb_xa, hb_index, n, GFP_KERNEL);
+ if (xa_is_err(ptr))
+ return ERR_PTR(xa_err(ptr));
+ hb_ctx = no_free_ptr(n);
+ hb_ctx->port = port;
+ }
+
+ cxl_bandwidth_add(hb_ctx->coord, hb_ctx->coord, ctx->coord);
+ }
+
+ return no_free_ptr(hb_xa);
+}
+
+/**
+ * cxl_hb_gather_bandwidth - handle the host bridge level bandwidth collection
+ * @xa: the xarray that holds the cxl_perf_ctx that has the bandwidth calculated
+ * below each host bridge.
+ *
+ * Return: xarray that holds cxl_perf_ctx per ACPI0017 device or ERR_PTR(-errno)
+ */
+static struct xarray *cxl_hb_gather_bandwidth(struct xarray *xa)
+{
+ struct xarray *mw_xa __free(free_perf_xa) =
+ kzalloc(sizeof(*mw_xa), GFP_KERNEL);
+ struct cxl_perf_ctx *ctx;
+ unsigned long index;
+
+ if (!mw_xa)
+ return ERR_PTR(-ENOMEM);
+ xa_init(mw_xa);
+
+ xa_for_each(xa, index, ctx) {
+ struct cxl_port *port = ctx->port;
+ struct cxl_port *parent_port;
+ struct cxl_perf_ctx *mw_ctx;
+ struct cxl_dport *dport;
+ unsigned long mw_index;
+ void *ptr;
+
+ parent_port = to_cxl_port(port->dev.parent);
+ mw_index = (unsigned long)parent_port->uport_dev;
+
+ mw_ctx = xa_load(mw_xa, mw_index);
+ if (!mw_ctx) {
+ struct cxl_perf_ctx *n __free(kfree) =
+ kzalloc(sizeof(*n), GFP_KERNEL);
+
+ if (!n)
+ return ERR_PTR(-ENOMEM);
+ ptr = xa_store(mw_xa, mw_index, n, GFP_KERNEL);
+ if (xa_is_err(ptr))
+ return ERR_PTR(xa_err(ptr));
+ mw_ctx = no_free_ptr(n);
+ }
+
+ dport = port->parent_dport;
+ cxl_coordinates_combine(ctx->coord, ctx->coord, dport->coord);
+ cxl_bandwidth_add(mw_ctx->coord, mw_ctx->coord, ctx->coord);
+ }
+
+ return no_free_ptr(mw_xa);
+}
+
+/**
+ * cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region
+ * @cxlr: The region being operated on
+ * @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance
+ */
+static void cxl_region_update_bandwidth(struct cxl_region *cxlr,
+ struct xarray *input_xa)
+{
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+ struct cxl_perf_ctx *ctx;
+ unsigned long index;
+
+ memset(coord, 0, sizeof(coord));
+ xa_for_each(input_xa, index, ctx)
+ cxl_bandwidth_add(coord, coord, ctx->coord);
+
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ cxlr->coord[i].read_bandwidth = coord[i].read_bandwidth;
+ cxlr->coord[i].write_bandwidth = coord[i].write_bandwidth;
+ }
+}
+
+/**
+ * cxl_region_shared_upstream_bandwidth_update - Recalculate the bandwidth for
+ * the region
+ * @cxlr: the cxl region to recalculate
+ *
+ * The function walks the topology from bottom up and calculates the bandwidth. It
+ * starts at the endpoints, processes at the switches if any, processes at the rootport
+ * level, at the host bridge level, and finally aggregates at the region.
+ */
+void cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr)
+{
+ struct xarray *working_xa;
+ int root_count = 0;
+ bool is_root;
+ int rc;
+
+ lockdep_assert_held(&cxl_dpa_rwsem);
+
+ struct xarray *usp_xa __free(free_perf_xa) =
+ kzalloc(sizeof(*usp_xa), GFP_KERNEL);
+
+ if (!usp_xa)
+ return;
+
+ xa_init(usp_xa);
+
+ /* Collect bandwidth data from all the endpoints. */
+ for (int i = 0; i < cxlr->params.nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = cxlr->params.targets[i];
+
+ is_root = false;
+ rc = cxl_endpoint_gather_bandwidth(cxlr, cxled, usp_xa, &is_root);
+ if (rc)
+ return;
+ root_count += is_root;
+ }
+
+ /* Detect asymmetric hierarchy with some direct attached endpoints. */
+ if (root_count && root_count != cxlr->params.nr_targets) {
+ dev_dbg(&cxlr->dev,
+ "Asymmetric hierarchy detected, bandwidth not updated\n");
+ return;
+ }
+
+ /*
+ * Walk up one or more switches to deal with the bandwidth of the
+ * switches if they exist. Endpoints directly attached to RPs skip
+ * over this part.
+ */
+ if (!root_count) {
+ do {
+ working_xa = cxl_switch_gather_bandwidth(cxlr, usp_xa,
+ &is_root);
+ if (IS_ERR(working_xa))
+ return;
+ free_perf_xa(usp_xa);
+ usp_xa = working_xa;
+ } while (!is_root);
+ }
+
+ /* Handle the bandwidth at the root port of the hierarchy */
+ working_xa = cxl_rp_gather_bandwidth(usp_xa);
+ if (IS_ERR(working_xa))
+ return;
+ free_perf_xa(usp_xa);
+ usp_xa = working_xa;
+
+ /* Handle the bandwidth at the host bridge of the hierarchy */
+ working_xa = cxl_hb_gather_bandwidth(usp_xa);
+ if (IS_ERR(working_xa))
+ return;
+ free_perf_xa(usp_xa);
+ usp_xa = working_xa;
+
+ /*
+ * Aggregate all the bandwidth collected per CFMWS (ACPI0017) and
+ * update the region bandwidth with the final calculated values.
+ */
+ cxl_region_update_bandwidth(cxlr, usp_xa);
+}
+
+void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_dpa_perf *perf;
+
lockdep_assert_held(&cxl_dpa_rwsem);
- if (!range_contains(&perf->dpa_range, &dpa))
+ perf = cxled_get_dpa_perf(cxled, cxlr->mode);
+ if (IS_ERR(perf))
return;
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 72a506c9dbd0..0c62b4069ba0 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -103,9 +103,11 @@ enum cxl_poison_trace_type {
};
long cxl_pci_get_latency(struct pci_dev *pdev);
-
+int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c);
int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
enum access_coordinate_class access);
bool cxl_need_node_perf_attrs_update(int nid);
+int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
+ struct access_coordinate *c);
#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index e5cdeafdf76e..946f8e44455f 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -225,7 +225,7 @@ static const char *cxl_mem_opcode_to_name(u16 opcode)
/**
* cxl_internal_send_cmd() - Kernel internal interface to send a mailbox command
- * @mds: The driver data for the operation
+ * @cxl_mbox: CXL mailbox context
* @mbox_cmd: initialized command to execute
*
* Context: Any context.
@@ -241,19 +241,19 @@ static const char *cxl_mem_opcode_to_name(u16 opcode)
* error. While this distinction can be useful for commands from userspace, the
* kernel will only be able to use results when both are successful.
*/
-int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
+int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *mbox_cmd)
{
size_t out_size, min_out;
int rc;
- if (mbox_cmd->size_in > mds->payload_size ||
- mbox_cmd->size_out > mds->payload_size)
+ if (mbox_cmd->size_in > cxl_mbox->payload_size ||
+ mbox_cmd->size_out > cxl_mbox->payload_size)
return -E2BIG;
out_size = mbox_cmd->size_out;
min_out = mbox_cmd->min_out;
- rc = mds->mbox_send(mds, mbox_cmd);
+ rc = cxl_mbox->mbox_send(cxl_mbox, mbox_cmd);
/*
* EIO is reserved for a payload size mismatch and mbox_send()
* may not return this error.
@@ -353,6 +353,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox,
struct cxl_memdev_state *mds, u16 opcode,
size_t in_size, size_t out_size, u64 in_payload)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
*mbox = (struct cxl_mbox_cmd) {
.opcode = opcode,
.size_in = in_size,
@@ -374,7 +375,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox,
/* Prepare to handle a full payload for variable sized output */
if (out_size == CXL_VARIABLE_PAYLOAD)
- mbox->size_out = mds->payload_size;
+ mbox->size_out = cxl_mbox->payload_size;
else
mbox->size_out = out_size;
@@ -398,6 +399,8 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
if (send_cmd->raw.rsvd)
return -EINVAL;
@@ -406,7 +409,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
* gets passed along without further checking, so it must be
* validated here.
*/
- if (send_cmd->out.size > mds->payload_size)
+ if (send_cmd->out.size > cxl_mbox->payload_size)
return -EINVAL;
if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
@@ -494,6 +497,7 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
struct cxl_memdev_state *mds,
const struct cxl_send_command *send_cmd)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mem_command mem_cmd;
int rc;
@@ -505,7 +509,7 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
* supports, but output can be arbitrarily large (simply write out as
* much data as the hardware provides).
*/
- if (send_cmd->in.size > mds->payload_size)
+ if (send_cmd->in.size > cxl_mbox->payload_size)
return -EINVAL;
/* Sanitize and construct a cxl_mem_command */
@@ -542,7 +546,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
return put_user(ARRAY_SIZE(cxl_mem_commands), &q->n_commands);
/*
- * otherwise, return max(n_commands, total commands) cxl_command_info
+ * otherwise, return min(n_commands, total commands) cxl_command_info
* structures.
*/
cxl_for_each_cmd(cmd) {
@@ -591,6 +595,7 @@ static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds,
u64 out_payload, s32 *size_out,
u32 *retval)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct device *dev = mds->cxlds.dev;
int rc;
@@ -601,7 +606,7 @@ static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds,
cxl_mem_opcode_to_name(mbox_cmd->opcode),
mbox_cmd->opcode, mbox_cmd->size_in);
- rc = mds->mbox_send(mds, mbox_cmd);
+ rc = cxl_mbox->mbox_send(cxl_mbox, mbox_cmd);
if (rc)
goto out;
@@ -659,11 +664,12 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
u32 *size, u8 *out)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
u32 remaining = *size;
u32 offset = 0;
while (remaining) {
- u32 xfer_size = min_t(u32, remaining, mds->payload_size);
+ u32 xfer_size = min_t(u32, remaining, cxl_mbox->payload_size);
struct cxl_mbox_cmd mbox_cmd;
struct cxl_mbox_get_log log;
int rc;
@@ -682,7 +688,7 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
.payload_out = out,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
/*
* The output payload length that indicates the number
@@ -752,22 +758,23 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_supported_logs *ret;
struct cxl_mbox_cmd mbox_cmd;
int rc;
- ret = kvmalloc(mds->payload_size, GFP_KERNEL);
+ ret = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_SUPPORTED_LOGS,
- .size_out = mds->payload_size,
+ .size_out = cxl_mbox->payload_size,
.payload_out = ret,
/* At least the record number field must be valid */
.min_out = 2,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0) {
kvfree(ret);
return ERR_PTR(rc);
@@ -910,6 +917,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
enum cxl_event_log_type log,
struct cxl_get_event_payload *get_pl)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_clear_event_payload *payload;
u16 total = le16_to_cpu(get_pl->record_count);
u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES;
@@ -920,8 +928,8 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
int i;
/* Payload size may limit the max handles */
- if (pl_size > mds->payload_size) {
- max_handles = (mds->payload_size - sizeof(*payload)) /
+ if (pl_size > cxl_mbox->payload_size) {
+ max_handles = (cxl_mbox->payload_size - sizeof(*payload)) /
sizeof(__le16);
pl_size = struct_size(payload, handles, max_handles);
}
@@ -955,7 +963,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
if (i == max_handles) {
payload->nr_recs = i;
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc)
goto free_pl;
i = 0;
@@ -966,7 +974,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
if (i) {
payload->nr_recs = i;
mbox_cmd.size_in = struct_size(payload, handles, i);
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc)
goto free_pl;
}
@@ -979,6 +987,7 @@ free_pl:
static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
enum cxl_event_log_type type)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
struct device *dev = mds->cxlds.dev;
struct cxl_get_event_payload *payload;
@@ -995,11 +1004,11 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
.payload_in = &log_type,
.size_in = sizeof(log_type),
.payload_out = payload,
- .size_out = mds->payload_size,
+ .size_out = cxl_mbox->payload_size,
.min_out = struct_size(payload, records, 0),
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc) {
dev_err_ratelimited(dev,
"Event log '%d': Failed to query event records : %d",
@@ -1070,6 +1079,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
*/
static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_partition_info pi;
struct cxl_mbox_cmd mbox_cmd;
int rc;
@@ -1079,7 +1089,7 @@ static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds)
.size_out = sizeof(pi),
.payload_out = &pi,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc)
return rc;
@@ -1106,6 +1116,7 @@ static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds)
*/
int cxl_dev_state_identify(struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
struct cxl_mbox_identify id;
struct cxl_mbox_cmd mbox_cmd;
@@ -1120,7 +1131,7 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds)
.size_out = sizeof(id),
.payload_out = &id,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0)
return rc;
@@ -1148,6 +1159,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
int rc;
u32 sec_out = 0;
struct cxl_get_security_output {
@@ -1159,14 +1171,13 @@ static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd)
.size_out = sizeof(out),
};
struct cxl_mbox_cmd mbox_cmd = { .opcode = cmd };
- struct cxl_dev_state *cxlds = &mds->cxlds;
if (cmd != CXL_MBOX_OP_SANITIZE && cmd != CXL_MBOX_OP_SECURE_ERASE)
return -EINVAL;
- rc = cxl_internal_send_cmd(mds, &sec_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &sec_cmd);
if (rc < 0) {
- dev_err(cxlds->dev, "Failed to get security state : %d", rc);
+ dev_err(cxl_mbox->host, "Failed to get security state : %d", rc);
return rc;
}
@@ -1183,9 +1194,9 @@ static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd)
sec_out & CXL_PMEM_SEC_STATE_LOCKED)
return -EINVAL;
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0) {
- dev_err(cxlds->dev, "Failed to sanitize device : %d", rc);
+ dev_err(cxl_mbox->host, "Failed to sanitize device : %d", rc);
return rc;
}
@@ -1214,7 +1225,7 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd)
int rc;
/* synchronize with cxl_mem_probe() and decoder write operations */
- device_lock(&cxlmd->dev);
+ guard(device)(&cxlmd->dev);
endpoint = cxlmd->endpoint;
down_read(&cxl_region_rwsem);
/*
@@ -1226,7 +1237,6 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd)
else
rc = -EBUSY;
up_read(&cxl_region_rwsem);
- device_unlock(&cxlmd->dev);
return rc;
}
@@ -1300,6 +1310,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
int cxl_set_timestamp(struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_cmd mbox_cmd;
struct cxl_mbox_set_timestamp_in pi;
int rc;
@@ -1311,7 +1322,7 @@ int cxl_set_timestamp(struct cxl_memdev_state *mds)
.payload_in = &pi,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
/*
* Command is optional. Devices may have another way of providing
* a timestamp, or may return all 0s in timestamp fields.
@@ -1328,6 +1339,7 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
struct cxl_region *cxlr)
{
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
struct cxl_mbox_poison_out *po;
struct cxl_mbox_poison_in pi;
int nr_records = 0;
@@ -1346,12 +1358,12 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
.opcode = CXL_MBOX_OP_GET_POISON,
.size_in = sizeof(pi),
.payload_in = &pi,
- .size_out = mds->payload_size,
+ .size_out = cxl_mbox->payload_size,
.payload_out = po,
.min_out = struct_size(po, record, 0),
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc)
break;
@@ -1382,7 +1394,9 @@ static void free_poison_buf(void *buf)
/* Get Poison List output buffer is protected by mds->poison.lock */
static int cxl_poison_alloc_buf(struct cxl_memdev_state *mds)
{
- mds->poison.list_out = kvmalloc(mds->payload_size, GFP_KERNEL);
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
+ mds->poison.list_out = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
if (!mds->poison.list_out)
return -ENOMEM;
@@ -1408,6 +1422,19 @@ int cxl_poison_state_init(struct cxl_memdev_state *mds)
}
EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL);
+int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host)
+{
+ if (!cxl_mbox || !host)
+ return -EINVAL;
+
+ cxl_mbox->host = host;
+ mutex_init(&cxl_mbox->mbox_mutex);
+ rcuwait_init(&cxl_mbox->mbox_wait);
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, CXL);
+
struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
{
struct cxl_memdev_state *mds;
@@ -1418,7 +1445,6 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
return ERR_PTR(-ENOMEM);
}
- mutex_init(&mds->mbox_mutex);
mutex_init(&mds->event.log_lock);
mds->cxlds.dev = dev;
mds->cxlds.reg_map.host = dev;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 0277726afd04..84fefb76dafa 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -58,7 +58,7 @@ static ssize_t payload_max_show(struct device *dev,
if (!mds)
return sysfs_emit(buf, "\n");
- return sysfs_emit(buf, "%zu\n", mds->payload_size);
+ return sysfs_emit(buf, "%zu\n", cxlds->cxl_mbox.payload_size);
}
static DEVICE_ATTR_RO(payload_max);
@@ -124,15 +124,16 @@ static ssize_t security_state_show(struct device *dev,
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
unsigned long state = mds->security.state;
int rc = 0;
/* sync with latest submission state */
- mutex_lock(&mds->mbox_mutex);
+ mutex_lock(&cxl_mbox->mbox_mutex);
if (mds->security.sanitize_active)
rc = sysfs_emit(buf, "sanitize\n");
- mutex_unlock(&mds->mbox_mutex);
+ mutex_unlock(&cxl_mbox->mbox_mutex);
if (rc)
return rc;
@@ -277,7 +278,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
{
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
struct cxl_mbox_inject_poison inject;
struct cxl_poison_record record;
struct cxl_mbox_cmd mbox_cmd;
@@ -307,13 +308,13 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
.size_in = sizeof(inject),
.payload_in = &inject,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc)
goto out;
cxlr = cxl_dpa_to_region(cxlmd, dpa);
if (cxlr)
- dev_warn_once(mds->cxlds.dev,
+ dev_warn_once(cxl_mbox->host,
"poison inject dpa:%#llx region: %s\n", dpa,
dev_name(&cxlr->dev));
@@ -332,7 +333,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
{
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
struct cxl_mbox_clear_poison clear;
struct cxl_poison_record record;
struct cxl_mbox_cmd mbox_cmd;
@@ -371,13 +372,13 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
.payload_in = &clear,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc)
goto out;
cxlr = cxl_dpa_to_region(cxlmd, dpa);
if (cxlr)
- dev_warn_once(mds->cxlds.dev,
+ dev_warn_once(cxl_mbox->host,
"poison clear dpa:%#llx region: %s\n", dpa,
dev_name(&cxlr->dev));
@@ -714,6 +715,7 @@ static int cxl_memdev_release_file(struct inode *inode, struct file *file)
*/
static int cxl_mem_get_fw_info(struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_fw_info info;
struct cxl_mbox_cmd mbox_cmd;
int rc;
@@ -724,7 +726,7 @@ static int cxl_mem_get_fw_info(struct cxl_memdev_state *mds)
.payload_out = &info,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0)
return rc;
@@ -748,6 +750,7 @@ static int cxl_mem_get_fw_info(struct cxl_memdev_state *mds)
*/
static int cxl_mem_activate_fw(struct cxl_memdev_state *mds, int slot)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_activate_fw activate;
struct cxl_mbox_cmd mbox_cmd;
@@ -764,7 +767,7 @@ static int cxl_mem_activate_fw(struct cxl_memdev_state *mds, int slot)
activate.action = CXL_FW_ACTIVATE_OFFLINE;
activate.slot = slot;
- return cxl_internal_send_cmd(mds, &mbox_cmd);
+ return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
}
/**
@@ -779,6 +782,7 @@ static int cxl_mem_activate_fw(struct cxl_memdev_state *mds, int slot)
*/
static int cxl_mem_abort_fw_xfer(struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_transfer_fw *transfer;
struct cxl_mbox_cmd mbox_cmd;
int rc;
@@ -798,7 +802,7 @@ static int cxl_mem_abort_fw_xfer(struct cxl_memdev_state *mds)
transfer->action = CXL_FW_TRANSFER_ACTION_ABORT;
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
kfree(transfer);
return rc;
}
@@ -829,12 +833,13 @@ static enum fw_upload_err cxl_fw_prepare(struct fw_upload *fwl, const u8 *data,
{
struct cxl_memdev_state *mds = fwl->dd_handle;
struct cxl_mbox_transfer_fw *transfer;
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
if (!size)
return FW_UPLOAD_ERR_INVALID_SIZE;
mds->fw.oneshot = struct_size(transfer, data, size) <
- mds->payload_size;
+ cxl_mbox->payload_size;
if (cxl_mem_get_fw_info(mds))
return FW_UPLOAD_ERR_HW_ERROR;
@@ -854,6 +859,7 @@ static enum fw_upload_err cxl_fw_write(struct fw_upload *fwl, const u8 *data,
{
struct cxl_memdev_state *mds = fwl->dd_handle;
struct cxl_dev_state *cxlds = &mds->cxlds;
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
struct cxl_memdev *cxlmd = cxlds->cxlmd;
struct cxl_mbox_transfer_fw *transfer;
struct cxl_mbox_cmd mbox_cmd;
@@ -877,7 +883,7 @@ static enum fw_upload_err cxl_fw_write(struct fw_upload *fwl, const u8 *data,
* sizeof(*transfer) is 128. These constraints imply that @cur_size
* will always be 128b aligned.
*/
- cur_size = min_t(size_t, size, mds->payload_size - sizeof(*transfer));
+ cur_size = min_t(size_t, size, cxl_mbox->payload_size - sizeof(*transfer));
remaining = size - cur_size;
size_in = struct_size(transfer, data, cur_size);
@@ -921,7 +927,7 @@ static enum fw_upload_err cxl_fw_write(struct fw_upload *fwl, const u8 *data,
.poll_count = 30,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0) {
rc = FW_UPLOAD_ERR_RW_ERROR;
goto out_free;
@@ -1059,16 +1065,17 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
static void sanitize_teardown_notifier(void *data)
{
struct cxl_memdev_state *mds = data;
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct kernfs_node *state;
/*
* Prevent new irq triggered invocations of the workqueue and
* flush inflight invocations.
*/
- mutex_lock(&mds->mbox_mutex);
+ mutex_lock(&cxl_mbox->mbox_mutex);
state = mds->security.sanitize_node;
mds->security.sanitize_node = NULL;
- mutex_unlock(&mds->mbox_mutex);
+ mutex_unlock(&cxl_mbox->mbox_mutex);
cancel_delayed_work_sync(&mds->security.poll_dwork);
sysfs_put(state);
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 51132a575b27..5b46bc46aaa9 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -211,37 +211,6 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL);
-static int wait_for_valid(struct pci_dev *pdev, int d)
-{
- u32 val;
- int rc;
-
- /*
- * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high
- * and Size Low registers are valid. Must be set within 1 second of
- * deassertion of reset to CXL device. Likely it is already set by the
- * time this runs, but otherwise give a 1.5 second timeout in case of
- * clock skew.
- */
- rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
- if (rc)
- return rc;
-
- if (val & CXL_DVSEC_MEM_INFO_VALID)
- return 0;
-
- msleep(1500);
-
- rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
- if (rc)
- return rc;
-
- if (val & CXL_DVSEC_MEM_INFO_VALID)
- return 0;
-
- return -ETIMEDOUT;
-}
-
static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val)
{
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
@@ -322,11 +291,13 @@ static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
}
-int cxl_dvsec_rr_decode(struct device *dev, int d,
+int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port,
struct cxl_endpoint_dvsec_info *info)
{
struct pci_dev *pdev = to_pci_dev(dev);
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
int hdm_count, rc, i, ranges = 0;
+ int d = cxlds->cxl_dvsec;
u16 cap, ctrl;
if (!d) {
@@ -353,12 +324,6 @@ int cxl_dvsec_rr_decode(struct device *dev, int d,
if (!hdm_count || hdm_count > 2)
return -EINVAL;
- rc = wait_for_valid(pdev, d);
- if (rc) {
- dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc);
- return rc;
- }
-
/*
* The current DVSEC values are moot if the memory capability is
* disabled, and they will remain moot after the HDM Decoder
@@ -376,6 +341,10 @@ int cxl_dvsec_rr_decode(struct device *dev, int d,
u64 base, size;
u32 temp;
+ rc = cxl_dvsec_mem_range_valid(cxlds, i);
+ if (rc)
+ return rc;
+
rc = pci_read_config_dword(
pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
if (rc)
@@ -390,10 +359,6 @@ int cxl_dvsec_rr_decode(struct device *dev, int d,
size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
if (!size) {
- info->dvsec_range[i] = (struct range) {
- .start = 0,
- .end = CXL_RESOURCE_NONE,
- };
continue;
}
@@ -411,12 +376,10 @@ int cxl_dvsec_rr_decode(struct device *dev, int d,
base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
- info->dvsec_range[i] = (struct range) {
+ info->dvsec_range[ranges++] = (struct range) {
.start = base,
.end = base + size - 1
};
-
- ranges++;
}
info->ranges = ranges;
@@ -463,7 +426,15 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
return -ENODEV;
}
- for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) {
+ if (!info->mem_enabled) {
+ rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
+ if (rc)
+ return rc;
+
+ return devm_cxl_enable_mem(&port->dev, cxlds);
+ }
+
+ for (i = 0, allowed = 0; i < info->ranges; i++) {
struct device *cxld_dev;
cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i],
@@ -477,7 +448,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
allowed++;
}
- if (!allowed && info->mem_enabled) {
+ if (!allowed) {
dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n");
return -ENXIO;
}
@@ -491,14 +462,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
* match. If at least one DVSEC range is enabled and allowed, skip HDM
* Decoder Capability Enable.
*/
- if (info->mem_enabled)
- return 0;
-
- rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
- if (rc)
- return rc;
-
- return devm_cxl_enable_mem(&port->dev, cxlds);
+ return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
@@ -772,22 +736,20 @@ static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
{
- struct cxl_rcrb_info *ri = &dport->rcrb;
- void __iomem *dport_aer = NULL;
resource_size_t aer_phys;
struct device *host;
+ u16 aer_cap;
- if (dport->rch && ri->aer_cap) {
+ aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
+ if (aer_cap) {
host = dport->reg_map.host;
- aer_phys = ri->aer_cap + ri->base;
- dport_aer = devm_cxl_iomap_block(host, aer_phys,
- sizeof(struct aer_capability_regs));
+ aer_phys = aer_cap + dport->rcrb.base;
+ dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
+ sizeof(struct aer_capability_regs));
}
-
- dport->regs.dport_aer = dport_aer;
}
-static void cxl_dport_map_regs(struct cxl_dport *dport)
+static void cxl_dport_map_ras(struct cxl_dport *dport)
{
struct cxl_register_map *map = &dport->reg_map;
struct device *dev = dport->dport_dev;
@@ -797,22 +759,16 @@ static void cxl_dport_map_regs(struct cxl_dport *dport)
else if (cxl_map_component_regs(map, &dport->regs.component,
BIT(CXL_CM_CAP_CAP_ID_RAS)))
dev_dbg(dev, "Failed to map RAS capability.\n");
-
- if (dport->rch)
- cxl_dport_map_rch_aer(dport);
}
static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
{
void __iomem *aer_base = dport->regs.dport_aer;
- struct pci_host_bridge *bridge;
u32 aer_cmd_mask, aer_cmd;
if (!aer_base)
return;
- bridge = to_pci_host_bridge(dport->dport_dev);
-
/*
* Disable RCH root port command interrupts.
* CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
@@ -821,34 +777,35 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
* the root cmd register's interrupts is required. But, PCI spec
* shows these are disabled by default on reset.
*/
- if (bridge->native_aer) {
- aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
- PCI_ERR_ROOT_CMD_NONFATAL_EN |
- PCI_ERR_ROOT_CMD_FATAL_EN);
- aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
- aer_cmd &= ~aer_cmd_mask;
- writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
- }
+ aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
+ PCI_ERR_ROOT_CMD_NONFATAL_EN |
+ PCI_ERR_ROOT_CMD_FATAL_EN);
+ aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
+ aer_cmd &= ~aer_cmd_mask;
+ writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
}
-void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
+/**
+ * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
+ * @dport: the cxl_dport that needs to be initialized
+ * @host: host device for devm operations
+ */
+void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
{
- struct device *dport_dev = dport->dport_dev;
+ dport->reg_map.host = host;
+ cxl_dport_map_ras(dport);
if (dport->rch) {
- struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev);
-
- if (host_bridge->native_aer)
- dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base);
- }
+ struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
- dport->reg_map.host = host;
- cxl_dport_map_regs(dport);
+ if (!host_bridge->native_aer)
+ return;
- if (dport->rch)
+ cxl_dport_map_rch_aer(dport);
cxl_disable_rch_root_ints(dport);
+ }
}
-EXPORT_SYMBOL_NS_GPL(cxl_setup_parent_dport, CXL);
+EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, CXL);
static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
struct cxl_dport *dport)
@@ -915,15 +872,13 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
struct aer_capability_regs aer_regs;
struct cxl_dport *dport;
- struct cxl_port *port;
int severity;
- port = cxl_pci_find_port(pdev, &dport);
+ struct cxl_port *port __free(put_cxl_port) =
+ cxl_pci_find_port(pdev, &dport);
if (!port)
return;
- put_device(&port->dev);
-
if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
return;
@@ -1076,3 +1031,26 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
__cxl_endpoint_decoder_reset_detected);
}
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL);
+
+int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)
+{
+ int speed, bw;
+ u16 lnksta;
+ u32 width;
+
+ speed = pcie_link_speed_mbps(pdev);
+ if (speed < 0)
+ return speed;
+ speed /= BITS_PER_BYTE;
+
+ pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
+ width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta);
+ bw = speed * width;
+
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ c[i].read_bandwidth = bw;
+ c[i].write_bandwidth = bw;
+ }
+
+ return 0;
+}
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 1d5007e3795a..e666ec6a9085 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -3,7 +3,6 @@
#include <linux/platform_device.h>
#include <linux/memregion.h>
#include <linux/workqueue.h>
-#include <linux/einj-cxl.h>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/module.h>
@@ -11,6 +10,7 @@
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/node.h>
+#include <cxl/einj.h>
#include <cxlmem.h>
#include <cxlpci.h>
#include <cxl.h>
@@ -828,27 +828,20 @@ static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport)
&cxl_einj_inject_fops);
}
-static struct cxl_port *__devm_cxl_add_port(struct device *host,
- struct device *uport_dev,
- resource_size_t component_reg_phys,
- struct cxl_dport *parent_dport)
+static int cxl_port_add(struct cxl_port *port,
+ resource_size_t component_reg_phys,
+ struct cxl_dport *parent_dport)
{
- struct cxl_port *port;
- struct device *dev;
+ struct device *dev __free(put_device) = &port->dev;
int rc;
- port = cxl_port_alloc(uport_dev, parent_dport);
- if (IS_ERR(port))
- return port;
-
- dev = &port->dev;
- if (is_cxl_memdev(uport_dev)) {
- struct cxl_memdev *cxlmd = to_cxl_memdev(uport_dev);
+ if (is_cxl_memdev(port->uport_dev)) {
+ struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
rc = dev_set_name(dev, "endpoint%d", port->id);
if (rc)
- goto err;
+ return rc;
/*
* The endpoint driver already enumerated the component and RAS
@@ -861,19 +854,41 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host,
} else if (parent_dport) {
rc = dev_set_name(dev, "port%d", port->id);
if (rc)
- goto err;
+ return rc;
rc = cxl_port_setup_regs(port, component_reg_phys);
if (rc)
- goto err;
- } else
+ return rc;
+ } else {
rc = dev_set_name(dev, "root%d", port->id);
- if (rc)
- goto err;
+ if (rc)
+ return rc;
+ }
rc = device_add(dev);
if (rc)
- goto err;
+ return rc;
+
+ /* Inhibit the cleanup function invoked */
+ dev = NULL;
+ return 0;
+}
+
+static struct cxl_port *__devm_cxl_add_port(struct device *host,
+ struct device *uport_dev,
+ resource_size_t component_reg_phys,
+ struct cxl_dport *parent_dport)
+{
+ struct cxl_port *port;
+ int rc;
+
+ port = cxl_port_alloc(uport_dev, parent_dport);
+ if (IS_ERR(port))
+ return port;
+
+ rc = cxl_port_add(port, component_reg_phys, parent_dport);
+ if (rc)
+ return ERR_PTR(rc);
rc = devm_add_action_or_reset(host, unregister_port, port);
if (rc)
@@ -891,10 +906,6 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host,
port->pci_latency = cxl_pci_get_latency(to_pci_dev(uport_dev));
return port;
-
-err:
- put_device(dev);
- return ERR_PTR(rc);
}
/**
@@ -941,7 +952,7 @@ struct cxl_root *devm_cxl_add_root(struct device *host,
port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL);
if (IS_ERR(port))
- return (struct cxl_root *)port;
+ return ERR_CAST(port);
cxl_root = to_cxl_root(port);
cxl_root->ops = ops;
@@ -1258,18 +1269,13 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_add_rch_dport, CXL);
static int add_ep(struct cxl_ep *new)
{
struct cxl_port *port = new->dport->port;
- int rc;
- device_lock(&port->dev);
- if (port->dead) {
- device_unlock(&port->dev);
+ guard(device)(&port->dev);
+ if (port->dead)
return -ENXIO;
- }
- rc = xa_insert(&port->endpoints, (unsigned long)new->ep, new,
- GFP_KERNEL);
- device_unlock(&port->dev);
- return rc;
+ return xa_insert(&port->endpoints, (unsigned long)new->ep,
+ new, GFP_KERNEL);
}
/**
@@ -1393,14 +1399,14 @@ static void delete_endpoint(void *data)
struct cxl_port *endpoint = cxlmd->endpoint;
struct device *host = endpoint_host(endpoint);
- device_lock(host);
- if (host->driver && !endpoint->dead) {
- devm_release_action(host, cxl_unlink_parent_dport, endpoint);
- devm_release_action(host, cxl_unlink_uport, endpoint);
- devm_release_action(host, unregister_port, endpoint);
+ scoped_guard(device, host) {
+ if (host->driver && !endpoint->dead) {
+ devm_release_action(host, cxl_unlink_parent_dport, endpoint);
+ devm_release_action(host, cxl_unlink_uport, endpoint);
+ devm_release_action(host, unregister_port, endpoint);
+ }
+ cxlmd->endpoint = NULL;
}
- cxlmd->endpoint = NULL;
- device_unlock(host);
put_device(&endpoint->dev);
put_device(host);
}
@@ -1477,12 +1483,11 @@ static void cxl_detach_ep(void *data)
.cxlmd = cxlmd,
.depth = i,
};
- struct device *dev;
struct cxl_ep *ep;
bool died = false;
- dev = bus_find_device(&cxl_bus_type, NULL, &ctx,
- port_has_memdev);
+ struct device *dev __free(put_device) =
+ bus_find_device(&cxl_bus_type, NULL, &ctx, port_has_memdev);
if (!dev)
continue;
port = to_cxl_port(dev);
@@ -1512,7 +1517,6 @@ static void cxl_detach_ep(void *data)
dev_name(&port->dev));
delete_switch_port(port);
}
- put_device(&port->dev);
device_unlock(&parent_port->dev);
}
}
@@ -1540,7 +1544,6 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
struct device *dport_dev)
{
struct device *dparent = grandparent(dport_dev);
- struct cxl_port *port, *parent_port = NULL;
struct cxl_dport *dport, *parent_dport;
resource_size_t component_reg_phys;
int rc;
@@ -1556,50 +1559,52 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
return -ENXIO;
}
- parent_port = find_cxl_port(dparent, &parent_dport);
+ struct cxl_port *parent_port __free(put_cxl_port) =
+ find_cxl_port(dparent, &parent_dport);
if (!parent_port) {
/* iterate to create this parent_port */
return -EAGAIN;
}
- device_lock(&parent_port->dev);
- if (!parent_port->dev.driver) {
- dev_warn(&cxlmd->dev,
- "port %s:%s disabled, failed to enumerate CXL.mem\n",
- dev_name(&parent_port->dev), dev_name(uport_dev));
- port = ERR_PTR(-ENXIO);
- goto out;
- }
+ /*
+ * Definition with __free() here to keep the sequence of
+ * dereferencing the device of the port before the parent_port releasing.
+ */
+ struct cxl_port *port __free(put_cxl_port) = NULL;
+ scoped_guard(device, &parent_port->dev) {
+ if (!parent_port->dev.driver) {
+ dev_warn(&cxlmd->dev,
+ "port %s:%s disabled, failed to enumerate CXL.mem\n",
+ dev_name(&parent_port->dev), dev_name(uport_dev));
+ return -ENXIO;
+ }
- port = find_cxl_port_at(parent_port, dport_dev, &dport);
- if (!port) {
- component_reg_phys = find_component_registers(uport_dev);
- port = devm_cxl_add_port(&parent_port->dev, uport_dev,
- component_reg_phys, parent_dport);
- /* retry find to pick up the new dport information */
- if (!IS_ERR(port))
+ port = find_cxl_port_at(parent_port, dport_dev, &dport);
+ if (!port) {
+ component_reg_phys = find_component_registers(uport_dev);
+ port = devm_cxl_add_port(&parent_port->dev, uport_dev,
+ component_reg_phys, parent_dport);
+ if (IS_ERR(port))
+ return PTR_ERR(port);
+
+ /* retry find to pick up the new dport information */
port = find_cxl_port_at(parent_port, dport_dev, &dport);
+ if (!port)
+ return -ENXIO;
+ }
}
-out:
- device_unlock(&parent_port->dev);
- if (IS_ERR(port))
- rc = PTR_ERR(port);
- else {
- dev_dbg(&cxlmd->dev, "add to new port %s:%s\n",
- dev_name(&port->dev), dev_name(port->uport_dev));
- rc = cxl_add_ep(dport, &cxlmd->dev);
- if (rc == -EBUSY) {
- /*
- * "can't" happen, but this error code means
- * something to the caller, so translate it.
- */
- rc = -ENXIO;
- }
- put_device(&port->dev);
+ dev_dbg(&cxlmd->dev, "add to new port %s:%s\n",
+ dev_name(&port->dev), dev_name(port->uport_dev));
+ rc = cxl_add_ep(dport, &cxlmd->dev);
+ if (rc == -EBUSY) {
+ /*
+ * "can't" happen, but this error code means
+ * something to the caller, so translate it.
+ */
+ rc = -ENXIO;
}
- put_device(&parent_port->dev);
return rc;
}
@@ -1630,7 +1635,6 @@ retry:
struct device *dport_dev = grandparent(iter);
struct device *uport_dev;
struct cxl_dport *dport;
- struct cxl_port *port;
/*
* The terminal "grandparent" in PCI is NULL and @platform_bus
@@ -1649,7 +1653,8 @@ retry:
dev_dbg(dev, "scan: iter: %s dport_dev: %s parent: %s\n",
dev_name(iter), dev_name(dport_dev),
dev_name(uport_dev));
- port = find_cxl_port(dport_dev, &dport);
+ struct cxl_port *port __free(put_cxl_port) =
+ find_cxl_port(dport_dev, &dport);
if (port) {
dev_dbg(&cxlmd->dev,
"found already registered port %s:%s\n",
@@ -1664,18 +1669,13 @@ retry:
* the parent_port lock as the current port may be being
* reaped.
*/
- if (rc && rc != -EBUSY) {
- put_device(&port->dev);
+ if (rc && rc != -EBUSY)
return rc;
- }
/* Any more ports to add between this one and the root? */
- if (!dev_is_cxl_root_child(&port->dev)) {
- put_device(&port->dev);
+ if (!dev_is_cxl_root_child(&port->dev))
continue;
- }
- put_device(&port->dev);
return 0;
}
@@ -1983,7 +1983,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, CXL);
int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
{
struct cxl_port *port;
- int rc;
if (WARN_ON_ONCE(!cxld))
return -EINVAL;
@@ -1993,11 +1992,8 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
port = to_cxl_port(cxld->dev.parent);
- device_lock(&port->dev);
- rc = cxl_decoder_add_locked(cxld, target_map);
- device_unlock(&port->dev);
-
- return rc;
+ guard(device)(&port->dev);
+ return cxl_decoder_add_locked(cxld, target_map);
}
EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL);
@@ -2241,6 +2237,26 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
}
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_get_perf_coordinates, CXL);
+int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
+ struct access_coordinate *c)
+{
+ struct cxl_dport *dport = port->parent_dport;
+
+ /* Check this port is connected to a switch DSP and not an RP */
+ if (parent_port_is_cxl_root(to_cxl_port(port->dev.parent)))
+ return -ENODEV;
+
+ if (!coordinates_valid(dport->coord))
+ return -EINVAL;
+
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ c[i].read_bandwidth = dport->coord[i].read_bandwidth;
+ c[i].write_bandwidth = dport->coord[i].write_bandwidth;
+ }
+
+ return 0;
+}
+
/* for user tooling to ensure port disable work has completed */
static ssize_t flush_store(const struct bus_type *bus, const char *buf, size_t count)
{
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 21ad5f242875..e701e4b04032 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1983,6 +1983,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
* then the region is already committed.
*/
p->state = CXL_CONFIG_COMMIT;
+ cxl_region_shared_upstream_bandwidth_update(cxlr);
return 0;
}
@@ -2004,6 +2005,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
if (rc)
return rc;
p->state = CXL_CONFIG_ACTIVE;
+ cxl_region_shared_upstream_bandwidth_update(cxlr);
}
cxled->cxld.interleave_ways = p->interleave_ways;
@@ -2313,8 +2315,6 @@ static void unregister_region(void *_cxlr)
struct cxl_region_params *p = &cxlr->params;
int i;
- unregister_memory_notifier(&cxlr->memory_notifier);
- unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
device_del(&cxlr->dev);
/*
@@ -2391,18 +2391,6 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
return true;
}
-static int cxl_region_nid(struct cxl_region *cxlr)
-{
- struct cxl_region_params *p = &cxlr->params;
- struct resource *res;
-
- guard(rwsem_read)(&cxl_region_rwsem);
- res = p->res;
- if (!res)
- return NUMA_NO_NODE;
- return phys_to_target_node(res->start);
-}
-
static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
unsigned long action, void *arg)
{
@@ -2415,7 +2403,11 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
return NOTIFY_DONE;
- region_nid = cxl_region_nid(cxlr);
+ /*
+ * No need to hold cxl_region_rwsem; region parameters are stable
+ * within the cxl_region driver.
+ */
+ region_nid = phys_to_target_node(cxlr->params.res->start);
if (nid != region_nid)
return NOTIFY_DONE;
@@ -2434,7 +2426,11 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb,
int *adist = data;
int region_nid;
- region_nid = cxl_region_nid(cxlr);
+ /*
+ * No need to hold cxl_region_rwsem; region parameters are stable
+ * within the cxl_region driver.
+ */
+ region_nid = phys_to_target_node(cxlr->params.res->start);
if (nid != region_nid)
return NOTIFY_OK;
@@ -2484,14 +2480,6 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
if (rc)
goto err;
- cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
- cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
- register_memory_notifier(&cxlr->memory_notifier);
-
- cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
- cxlr->adist_notifier.priority = 100;
- register_mt_adistance_algorithm(&cxlr->adist_notifier);
-
rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
if (rc)
return ERR_PTR(rc);
@@ -3094,11 +3082,11 @@ static void cxlr_release_nvdimm(void *_cxlr)
struct cxl_region *cxlr = _cxlr;
struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
- device_lock(&cxl_nvb->dev);
- if (cxlr->cxlr_pmem)
- devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
- cxlr->cxlr_pmem);
- device_unlock(&cxl_nvb->dev);
+ scoped_guard(device, &cxl_nvb->dev) {
+ if (cxlr->cxlr_pmem)
+ devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
+ cxlr->cxlr_pmem);
+ }
cxlr->cxl_nvb = NULL;
put_device(&cxl_nvb->dev);
}
@@ -3134,13 +3122,14 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
dev_name(dev));
- device_lock(&cxl_nvb->dev);
- if (cxl_nvb->dev.driver)
- rc = devm_add_action_or_reset(&cxl_nvb->dev,
- cxlr_pmem_unregister, cxlr_pmem);
- else
- rc = -ENXIO;
- device_unlock(&cxl_nvb->dev);
+ scoped_guard(device, &cxl_nvb->dev) {
+ if (cxl_nvb->dev.driver)
+ rc = devm_add_action_or_reset(&cxl_nvb->dev,
+ cxlr_pmem_unregister,
+ cxlr_pmem);
+ else
+ rc = -ENXIO;
+ }
if (rc)
goto err_bridge;
@@ -3386,6 +3375,14 @@ static int is_system_ram(struct resource *res, void *arg)
return 1;
}
+static void shutdown_notifiers(void *_cxlr)
+{
+ struct cxl_region *cxlr = _cxlr;
+
+ unregister_memory_notifier(&cxlr->memory_notifier);
+ unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
+}
+
static int cxl_region_probe(struct device *dev)
{
struct cxl_region *cxlr = to_cxl_region(dev);
@@ -3421,6 +3418,18 @@ out:
if (rc)
return rc;
+ cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
+ cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
+ register_memory_notifier(&cxlr->memory_notifier);
+
+ cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
+ cxlr->adist_notifier.priority = 100;
+ register_mt_adistance_algorithm(&cxlr->adist_notifier);
+
+ rc = devm_add_action_or_reset(&cxlr->dev, shutdown_notifiers, cxlr);
+ if (rc)
+ return rc;
+
switch (cxlr->mode) {
case CXL_DECODER_PMEM:
return devm_cxl_add_pmem_region(cxlr);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 9afb407d438f..0d8b810a51f0 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -744,6 +744,7 @@ struct cxl_root *find_cxl_root(struct cxl_port *port);
void put_cxl_root(struct cxl_root *cxl_root);
DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_cxl_root(_T))
+DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
void cxl_bus_rescan(void);
void cxl_bus_drain(void);
@@ -762,9 +763,10 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
#ifdef CONFIG_PCIEAER_CXL
void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
+void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
#else
-static inline void cxl_setup_parent_dport(struct device *host,
- struct cxl_dport *dport) { }
+static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
+ struct device *host) { }
#endif
struct cxl_decoder *to_cxl_decoder(struct device *dev);
@@ -809,7 +811,7 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
struct cxl_endpoint_dvsec_info *info);
int devm_cxl_add_passthrough_decoder(struct cxl_port *port);
-int cxl_dvsec_rr_decode(struct device *dev, int dvsec,
+int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port,
struct cxl_endpoint_dvsec_info *info);
bool is_cxl_region(struct device *dev);
@@ -889,6 +891,7 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord);
void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled);
+void cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr);
void cxl_memdev_update_perf(struct cxl_memdev *cxlmd);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index afb53d058d62..2a25d1957ddb 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -3,11 +3,12 @@
#ifndef __CXL_MEM_H__
#define __CXL_MEM_H__
#include <uapi/linux/cxl_mem.h>
+#include <linux/pci.h>
#include <linux/cdev.h>
#include <linux/uuid.h>
-#include <linux/rcuwait.h>
-#include <linux/cxl-event.h>
#include <linux/node.h>
+#include <cxl/event.h>
+#include <cxl/mailbox.h>
#include "cxl.h"
/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
@@ -397,11 +398,13 @@ enum cxl_devtype {
* struct cxl_dpa_perf - DPA performance property entry
* @dpa_range: range for DPA address
* @coord: QoS performance data (i.e. latency, bandwidth)
+ * @cdat_coord: raw QoS performance data from CDAT
* @qos_class: QoS Class cookies
*/
struct cxl_dpa_perf {
struct range dpa_range;
struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX];
int qos_class;
};
@@ -424,6 +427,7 @@ struct cxl_dpa_perf {
* @ram_res: Active Volatile memory capacity configuration
* @serial: PCIe Device Serial Number
* @type: Generic Memory Class device or Vendor Specific Memory device
+ * @cxl_mbox: CXL mailbox context
*/
struct cxl_dev_state {
struct device *dev;
@@ -438,8 +442,14 @@ struct cxl_dev_state {
struct resource ram_res;
u64 serial;
enum cxl_devtype type;
+ struct cxl_mailbox cxl_mbox;
};
+static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
+{
+ return dev_get_drvdata(cxl_mbox->host);
+}
+
/**
* struct cxl_memdev_state - Generic Type-3 Memory Device Class driver data
*
@@ -448,11 +458,8 @@ struct cxl_dev_state {
* the functionality related to that like Identify Memory Device and Get
* Partition Info
* @cxlds: Core driver state common across Type-2 and Type-3 devices
- * @payload_size: Size of space for payload
- * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
* @lsa_size: Size of Label Storage Area
* (CXL 2.0 8.2.9.5.1.1 Identify Memory Device)
- * @mbox_mutex: Mutex to synchronize mailbox access.
* @firmware_version: Firmware version for the memory device.
* @enabled_cmds: Hardware commands found enabled in CEL.
* @exclusive_cmds: Commands that are kernel-internal only
@@ -470,17 +477,13 @@ struct cxl_dev_state {
* @poison: poison driver state info
* @security: security driver state info
* @fw: firmware upload / activation state
- * @mbox_wait: RCU wait for mbox send completely
- * @mbox_send: @dev specific transport for transmitting mailbox commands
*
* See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
* details on capacity parameters.
*/
struct cxl_memdev_state {
struct cxl_dev_state cxlds;
- size_t payload_size;
size_t lsa_size;
- struct mutex mbox_mutex; /* Protects device mailbox and firmware */
char firmware_version[0x10];
DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
@@ -500,10 +503,6 @@ struct cxl_memdev_state {
struct cxl_poison_state poison;
struct cxl_security_state security;
struct cxl_fw_state fw;
-
- struct rcuwait mbox_wait;
- int (*mbox_send)(struct cxl_memdev_state *mds,
- struct cxl_mbox_cmd *cmd);
};
static inline struct cxl_memdev_state *
@@ -814,7 +813,7 @@ enum {
CXL_PMEM_SEC_PASS_USER,
};
-int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
+int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *cmd);
int cxl_dev_state_identify(struct cxl_memdev_state *mds);
int cxl_await_media_ready(struct cxl_dev_state *cxlds);
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 7de232eaeb17..a9fd5cd5a0d2 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -109,7 +109,6 @@ static int cxl_mem_probe(struct device *dev)
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *endpoint_parent;
- struct cxl_port *parent_port;
struct cxl_dport *dport;
struct dentry *dentry;
int rc;
@@ -146,7 +145,8 @@ static int cxl_mem_probe(struct device *dev)
if (rc)
return rc;
- parent_port = cxl_mem_find_port(cxlmd, &dport);
+ struct cxl_port *parent_port __free(put_cxl_port) =
+ cxl_mem_find_port(cxlmd, &dport);
if (!parent_port) {
dev_err(dev, "CXL port topology not found\n");
return -ENXIO;
@@ -166,22 +166,19 @@ static int cxl_mem_probe(struct device *dev)
else
endpoint_parent = &parent_port->dev;
- cxl_setup_parent_dport(dev, dport);
+ cxl_dport_init_ras_reporting(dport, dev);
- device_lock(endpoint_parent);
- if (!endpoint_parent->driver) {
- dev_err(dev, "CXL port topology %s not enabled\n",
- dev_name(endpoint_parent));
- rc = -ENXIO;
- goto unlock;
- }
+ scoped_guard(device, endpoint_parent) {
+ if (!endpoint_parent->driver) {
+ dev_err(dev, "CXL port topology %s not enabled\n",
+ dev_name(endpoint_parent));
+ return -ENXIO;
+ }
- rc = devm_cxl_add_endpoint(endpoint_parent, cxlmd, dport);
-unlock:
- device_unlock(endpoint_parent);
- put_device(&parent_port->dev);
- if (rc)
- return rc;
+ rc = devm_cxl_add_endpoint(endpoint_parent, cxlmd, dport);
+ if (rc)
+ return rc;
+ }
/*
* The kernel may be operating out of CXL memory on this device,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 4be35dc22202..37164174b5fb 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -11,6 +11,7 @@
#include <linux/pci.h>
#include <linux/aer.h>
#include <linux/io.h>
+#include <cxl/mailbox.h>
#include "cxlmem.h"
#include "cxlpci.h"
#include "cxl.h"
@@ -124,6 +125,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
u16 opcode;
struct cxl_dev_id *dev_id = id;
struct cxl_dev_state *cxlds = dev_id->cxlds;
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
if (!cxl_mbox_background_complete(cxlds))
@@ -132,13 +134,13 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
if (opcode == CXL_MBOX_OP_SANITIZE) {
- mutex_lock(&mds->mbox_mutex);
+ mutex_lock(&cxl_mbox->mbox_mutex);
if (mds->security.sanitize_node)
mod_delayed_work(system_wq, &mds->security.poll_dwork, 0);
- mutex_unlock(&mds->mbox_mutex);
+ mutex_unlock(&cxl_mbox->mbox_mutex);
} else {
/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
- rcuwait_wake_up(&mds->mbox_wait);
+ rcuwait_wake_up(&cxl_mbox->mbox_wait);
}
return IRQ_HANDLED;
@@ -152,8 +154,9 @@ static void cxl_mbox_sanitize_work(struct work_struct *work)
struct cxl_memdev_state *mds =
container_of(work, typeof(*mds), security.poll_dwork.work);
struct cxl_dev_state *cxlds = &mds->cxlds;
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
- mutex_lock(&mds->mbox_mutex);
+ mutex_lock(&cxl_mbox->mbox_mutex);
if (cxl_mbox_background_complete(cxlds)) {
mds->security.poll_tmo_secs = 0;
if (mds->security.sanitize_node)
@@ -167,12 +170,12 @@ static void cxl_mbox_sanitize_work(struct work_struct *work)
mds->security.poll_tmo_secs = min(15 * 60, timeout);
schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ);
}
- mutex_unlock(&mds->mbox_mutex);
+ mutex_unlock(&cxl_mbox->mbox_mutex);
}
/**
* __cxl_pci_mbox_send_cmd() - Execute a mailbox command
- * @mds: The memory device driver data
+ * @cxl_mbox: CXL mailbox context
* @mbox_cmd: Command to send to the memory device.
*
* Context: Any context. Expects mbox_mutex to be held.
@@ -192,17 +195,18 @@ static void cxl_mbox_sanitize_work(struct work_struct *work)
* not need to coordinate with each other. The driver only uses the primary
* mailbox.
*/
-static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds,
+static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *mbox_cmd)
{
- struct cxl_dev_state *cxlds = &mds->cxlds;
+ struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox);
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
struct device *dev = cxlds->dev;
u64 cmd_reg, status_reg;
size_t out_len;
int rc;
- lockdep_assert_held(&mds->mbox_mutex);
+ lockdep_assert_held(&cxl_mbox->mbox_mutex);
/*
* Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
@@ -315,10 +319,10 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds,
timeout = mbox_cmd->poll_interval_ms;
for (i = 0; i < mbox_cmd->poll_count; i++) {
- if (rcuwait_wait_event_timeout(&mds->mbox_wait,
- cxl_mbox_background_complete(cxlds),
- TASK_UNINTERRUPTIBLE,
- msecs_to_jiffies(timeout)) > 0)
+ if (rcuwait_wait_event_timeout(&cxl_mbox->mbox_wait,
+ cxl_mbox_background_complete(cxlds),
+ TASK_UNINTERRUPTIBLE,
+ msecs_to_jiffies(timeout)) > 0)
break;
}
@@ -360,7 +364,7 @@ success:
*/
size_t n;
- n = min3(mbox_cmd->size_out, mds->payload_size, out_len);
+ n = min3(mbox_cmd->size_out, cxl_mbox->payload_size, out_len);
memcpy_fromio(mbox_cmd->payload_out, payload, n);
mbox_cmd->size_out = n;
} else {
@@ -370,14 +374,14 @@ success:
return 0;
}
-static int cxl_pci_mbox_send(struct cxl_memdev_state *mds,
+static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *cmd)
{
int rc;
- mutex_lock_io(&mds->mbox_mutex);
- rc = __cxl_pci_mbox_send_cmd(mds, cmd);
- mutex_unlock(&mds->mbox_mutex);
+ mutex_lock_io(&cxl_mbox->mbox_mutex);
+ rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd);
+ mutex_unlock(&cxl_mbox->mbox_mutex);
return rc;
}
@@ -385,6 +389,7 @@ static int cxl_pci_mbox_send(struct cxl_memdev_state *mds,
static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
{
struct cxl_dev_state *cxlds = &mds->cxlds;
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
struct device *dev = cxlds->dev;
unsigned long timeout;
@@ -417,8 +422,8 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
return -ETIMEDOUT;
}
- mds->mbox_send = cxl_pci_mbox_send;
- mds->payload_size =
+ cxl_mbox->mbox_send = cxl_pci_mbox_send;
+ cxl_mbox->payload_size =
1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
/*
@@ -428,16 +433,15 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
* there's no point in going forward. If the size is too large, there's
* no harm is soft limiting it.
*/
- mds->payload_size = min_t(size_t, mds->payload_size, SZ_1M);
- if (mds->payload_size < 256) {
+ cxl_mbox->payload_size = min_t(size_t, cxl_mbox->payload_size, SZ_1M);
+ if (cxl_mbox->payload_size < 256) {
dev_err(dev, "Mailbox is too small (%zub)",
- mds->payload_size);
+ cxl_mbox->payload_size);
return -ENXIO;
}
- dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size);
+ dev_dbg(dev, "Mailbox payload sized %zu", cxl_mbox->payload_size);
- rcuwait_init(&mds->mbox_wait);
INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work);
/* background command interrupts are optional */
@@ -473,7 +477,6 @@ static bool is_cxl_restricted(struct pci_dev *pdev)
static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev,
struct cxl_register_map *map)
{
- struct cxl_port *port;
struct cxl_dport *dport;
resource_size_t component_reg_phys;
@@ -482,14 +485,12 @@ static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev,
.resource = CXL_RESOURCE_NONE,
};
- port = cxl_pci_find_port(pdev, &dport);
+ struct cxl_port *port __free(put_cxl_port) =
+ cxl_pci_find_port(pdev, &dport);
if (!port)
return -EPROBE_DEFER;
component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport);
-
- put_device(&port->dev);
-
if (component_reg_phys == CXL_RESOURCE_NONE)
return -ENXIO;
@@ -578,9 +579,10 @@ static void free_event_buf(void *buf)
*/
static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_get_event_payload *buf;
- buf = kvmalloc(mds->payload_size, GFP_KERNEL);
+ buf = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
mds->event.buf = buf;
@@ -653,6 +655,7 @@ static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
static int cxl_event_get_int_policy(struct cxl_memdev_state *mds,
struct cxl_event_interrupt_policy *policy)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_cmd mbox_cmd = {
.opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY,
.payload_out = policy,
@@ -660,7 +663,7 @@ static int cxl_event_get_int_policy(struct cxl_memdev_state *mds,
};
int rc;
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0)
dev_err(mds->cxlds.dev,
"Failed to get event interrupt policy : %d", rc);
@@ -671,6 +674,7 @@ static int cxl_event_get_int_policy(struct cxl_memdev_state *mds,
static int cxl_event_config_msgnums(struct cxl_memdev_state *mds,
struct cxl_event_interrupt_policy *policy)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_cmd mbox_cmd;
int rc;
@@ -687,7 +691,7 @@ static int cxl_event_config_msgnums(struct cxl_memdev_state *mds,
.size_in = sizeof(*policy),
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0) {
dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d",
rc);
@@ -786,6 +790,23 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
return 0;
}
+static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds)
+{
+ int rc;
+
+ /*
+ * Fail the init if there's no mailbox. For a type3 this is out of spec.
+ */
+ if (!cxlds->reg_map.device_map.mbox.valid)
+ return -ENODEV;
+
+ rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
@@ -846,6 +867,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
+ rc = cxl_pci_type3_init_mailbox(cxlds);
+ if (rc)
+ return rc;
+
rc = cxl_await_media_ready(cxlds);
if (rc == 0)
cxlds->media_ready = true;
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 4ef93da22335..a6538a5f5c9f 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -102,13 +102,15 @@ static int cxl_pmem_get_config_size(struct cxl_memdev_state *mds,
struct nd_cmd_get_config_size *cmd,
unsigned int buf_len)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
if (sizeof(*cmd) > buf_len)
return -EINVAL;
*cmd = (struct nd_cmd_get_config_size){
.config_size = mds->lsa_size,
.max_xfer =
- mds->payload_size - sizeof(struct cxl_mbox_set_lsa),
+ cxl_mbox->payload_size - sizeof(struct cxl_mbox_set_lsa),
};
return 0;
@@ -118,6 +120,7 @@ static int cxl_pmem_get_config_data(struct cxl_memdev_state *mds,
struct nd_cmd_get_config_data_hdr *cmd,
unsigned int buf_len)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_lsa get_lsa;
struct cxl_mbox_cmd mbox_cmd;
int rc;
@@ -139,7 +142,7 @@ static int cxl_pmem_get_config_data(struct cxl_memdev_state *mds,
.payload_out = cmd->out_buf,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
cmd->status = 0;
return rc;
@@ -149,6 +152,7 @@ static int cxl_pmem_set_config_data(struct cxl_memdev_state *mds,
struct nd_cmd_set_config_hdr *cmd,
unsigned int buf_len)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_set_lsa *set_lsa;
struct cxl_mbox_cmd mbox_cmd;
int rc;
@@ -175,7 +179,7 @@ static int cxl_pmem_set_config_data(struct cxl_memdev_state *mds,
.size_in = struct_size(set_lsa, data, cmd->in_length),
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
/*
* Set "firmware" status (4-packed bytes at the end of the input
@@ -233,15 +237,13 @@ static int detach_nvdimm(struct device *dev, void *data)
if (!is_cxl_nvdimm(dev))
return 0;
- device_lock(dev);
- if (!dev->driver)
- goto out;
-
- cxl_nvd = to_cxl_nvdimm(dev);
- if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data)
- release = true;
-out:
- device_unlock(dev);
+ scoped_guard(device, dev) {
+ if (dev->driver) {
+ cxl_nvd = to_cxl_nvdimm(dev);
+ if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data)
+ release = true;
+ }
+ }
if (release)
device_release_driver(dev);
return 0;
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index d7d5d982ce69..861dde65768f 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -98,7 +98,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
struct cxl_port *root;
int rc;
- rc = cxl_dvsec_rr_decode(cxlds->dev, cxlds->cxl_dvsec, &info);
+ rc = cxl_dvsec_rr_decode(cxlds->dev, port, &info);
if (rc < 0)
return rc;
diff --git a/drivers/cxl/security.c b/drivers/cxl/security.c
index 21856a3f408e..452d1a9b9148 100644
--- a/drivers/cxl/security.c
+++ b/drivers/cxl/security.c
@@ -14,6 +14,7 @@ static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm,
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
unsigned long security_flags = 0;
struct cxl_get_security_output {
@@ -29,7 +30,7 @@ static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm,
.payload_out = &out,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0)
return 0;
@@ -70,7 +71,7 @@ static int cxl_pmem_security_change_key(struct nvdimm *nvdimm,
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
struct cxl_mbox_cmd mbox_cmd;
struct cxl_set_pass set_pass;
@@ -87,7 +88,7 @@ static int cxl_pmem_security_change_key(struct nvdimm *nvdimm,
.payload_in = &set_pass,
};
- return cxl_internal_send_cmd(mds, &mbox_cmd);
+ return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
}
static int __cxl_pmem_security_disable(struct nvdimm *nvdimm,
@@ -96,7 +97,7 @@ static int __cxl_pmem_security_disable(struct nvdimm *nvdimm,
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
struct cxl_disable_pass dis_pass;
struct cxl_mbox_cmd mbox_cmd;
@@ -112,7 +113,7 @@ static int __cxl_pmem_security_disable(struct nvdimm *nvdimm,
.payload_in = &dis_pass,
};
- return cxl_internal_send_cmd(mds, &mbox_cmd);
+ return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
}
static int cxl_pmem_security_disable(struct nvdimm *nvdimm,
@@ -131,12 +132,12 @@ static int cxl_pmem_security_freeze(struct nvdimm *nvdimm)
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
struct cxl_mbox_cmd mbox_cmd = {
.opcode = CXL_MBOX_OP_FREEZE_SECURITY,
};
- return cxl_internal_send_cmd(mds, &mbox_cmd);
+ return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
}
static int cxl_pmem_security_unlock(struct nvdimm *nvdimm,
@@ -144,7 +145,7 @@ static int cxl_pmem_security_unlock(struct nvdimm *nvdimm,
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
u8 pass[NVDIMM_PASSPHRASE_LEN];
struct cxl_mbox_cmd mbox_cmd;
int rc;
@@ -156,7 +157,7 @@ static int cxl_pmem_security_unlock(struct nvdimm *nvdimm,
.payload_in = pass,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0)
return rc;
@@ -169,7 +170,7 @@ static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm,
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
struct cxl_mbox_cmd mbox_cmd;
struct cxl_pass_erase erase;
int rc;
@@ -185,7 +186,7 @@ static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm,
.payload_in = &erase,
};
- rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0)
return rc;
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 518eaa073b2b..b360dca2c69e 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1911,7 +1911,6 @@ static __poll_t fw_device_op_poll(struct file *file, poll_table * pt)
const struct file_operations fw_device_ops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.open = fw_device_op_open,
.read = fw_device_op_read,
.unlocked_ioctl = fw_device_op_ioctl,
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index 69c15135371c..88c5c4ff4bb6 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -2886,7 +2886,6 @@ static ssize_t reset_all_on_write(struct file *filp, const char __user *buf,
static const struct file_operations fops_reset_counts = {
.owner = THIS_MODULE,
.open = simple_open,
- .llseek = no_llseek,
.write = reset_all_on_write,
};
diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
index 130d13e9cd6b..9e89a6a763da 100644
--- a/drivers/firmware/arm_scmi/raw_mode.c
+++ b/drivers/firmware/arm_scmi/raw_mode.c
@@ -950,7 +950,6 @@ static const struct file_operations scmi_dbg_raw_mode_reset_fops = {
.open = scmi_dbg_raw_mode_open,
.release = scmi_dbg_raw_mode_release,
.write = scmi_dbg_raw_mode_reset_write,
- .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -960,7 +959,6 @@ static const struct file_operations scmi_dbg_raw_mode_message_fops = {
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_write,
.poll = scmi_dbg_raw_mode_message_poll,
- .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -977,7 +975,6 @@ static const struct file_operations scmi_dbg_raw_mode_message_async_fops = {
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_async_write,
.poll = scmi_dbg_raw_mode_message_poll,
- .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -1001,7 +998,6 @@ static const struct file_operations scmi_dbg_raw_mode_notification_fops = {
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_notif_read,
.poll = scmi_test_dbg_raw_mode_notif_poll,
- .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -1025,7 +1021,6 @@ static const struct file_operations scmi_dbg_raw_mode_errors_fops = {
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_errors_read,
.poll = scmi_test_dbg_raw_mode_errors_poll,
- .llseek = no_llseek,
.owner = THIS_MODULE,
};
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index 97bafb5f7038..0c17bdd388e1 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -309,7 +309,6 @@ static const struct file_operations efi_capsule_fops = {
.open = efi_capsule_open,
.write = efi_capsule_write,
.release = efi_capsule_release,
- .llseek = no_llseek,
};
static struct miscdevice efi_capsule_misc = {
diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c
index 47d67bb0a516..9e2628728aad 100644
--- a/drivers/firmware/efi/test/efi_test.c
+++ b/drivers/firmware/efi/test/efi_test.c
@@ -750,7 +750,6 @@ static const struct file_operations efi_test_fops = {
.unlocked_ioctl = efi_test_ioctl,
.open = efi_test_open,
.release = efi_test_close,
- .llseek = no_llseek,
};
static struct miscdevice efi_test_dev = {
diff --git a/drivers/firmware/turris-mox-rwtm.c b/drivers/firmware/turris-mox-rwtm.c
index 525ebdc7ded5..f3bc0d427825 100644
--- a/drivers/firmware/turris-mox-rwtm.c
+++ b/drivers/firmware/turris-mox-rwtm.c
@@ -386,7 +386,6 @@ static const struct file_operations do_sign_fops = {
.open = rwtm_debug_open,
.read = do_sign_read,
.write = do_sign_write,
- .llseek = no_llseek,
};
static void rwtm_debugfs_release(void *root)
diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c
index 48f2ee0f78c4..883ef86ad3fc 100644
--- a/drivers/gnss/core.c
+++ b/drivers/gnss/core.c
@@ -206,7 +206,6 @@ static const struct file_operations gnss_fops = {
.read = gnss_read,
.write = gnss_write,
.poll = gnss_poll,
- .llseek = no_llseek,
};
static struct class *gnss_class;
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 455eecf6380e..d39c6618bade 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -347,7 +347,6 @@ static const struct file_operations gpio_mockup_debugfs_ops = {
.open = gpio_mockup_debugfs_open,
.read = gpio_mockup_debugfs_read,
.write = gpio_mockup_debugfs_write,
- .llseek = no_llseek,
.release = single_release,
};
diff --git a/drivers/gpio/gpio-sloppy-logic-analyzer.c b/drivers/gpio/gpio-sloppy-logic-analyzer.c
index aed6d1f6cfc3..07e0d7180579 100644
--- a/drivers/gpio/gpio-sloppy-logic-analyzer.c
+++ b/drivers/gpio/gpio-sloppy-logic-analyzer.c
@@ -217,7 +217,6 @@ static const struct file_operations fops_trigger = {
.owner = THIS_MODULE,
.open = trigger_open,
.write = trigger_write,
- .llseek = no_llseek,
.release = single_release,
};
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 5aac59de0d76..78c9d9ed3d68 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -2842,7 +2842,6 @@ static const struct file_operations gpio_fileops = {
.poll = lineinfo_watch_poll,
.read = lineinfo_watch_read,
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = gpio_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = gpio_ioctl_compat,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dcd59040c449..9b1e0ede05a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1083,10 +1083,6 @@ struct amdgpu_device {
struct amdgpu_virt virt;
- /* link all shadow bo */
- struct list_head shadow_list;
- struct mutex shadow_list_lock;
-
/* record hw reset is performed */
bool has_hw_reset;
u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index 57bda66e85ef..2ca127173135 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -511,7 +511,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
return -EINVAL;
}
- /* udpate aca bank to aca source error_cache first */
+ /* update aca bank to aca source error_cache first */
ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 1254a43ec96b..3bc0cbf45bc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -950,28 +950,30 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
* @inst: xcc's instance number on a multi-XCC setup
*/
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
- int *wave_cnt, int *vmid, uint32_t inst)
+ struct kfd_cu_occupancy *queue_cnt, uint32_t inst)
{
int pipe_idx;
int queue_slot;
unsigned int reg_val;
-
+ unsigned int wave_cnt;
/*
* Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
* parameters to read out waves in flight. Get VMID if there are
* non-zero waves in flight.
*/
- *vmid = 0xFF;
- *wave_cnt = 0;
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
- soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
- reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
- queue_slot);
- *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
- if (*wave_cnt != 0)
- *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) &
- CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
+ soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
+ reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
+ wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
+ if (wave_cnt != 0) {
+ queue_cnt->wave_cnt += wave_cnt;
+ queue_cnt->doorbell_off =
+ (RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ }
}
/**
@@ -981,9 +983,8 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* or more queues running and submitting waves to compute units.
*
* @adev: Handle of device from which to get number of waves in flight
- * @pasid: Identifies the process for which this query call is invoked
- * @pasid_wave_cnt: Output parameter updated with number of waves in flight that
- * belong to process with given pasid
+ * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset
+ * for comparison later.
* @max_waves_per_cu: Output parameter updated with maximum number of waves
* possible per Compute Unit
* @inst: xcc's instance number on a multi-XCC setup
@@ -1011,34 +1012,28 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* number of waves that are in flight for the queue at specified index. The
* index ranges from 0 to 7.
*
- * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID
- * of the wave(s).
+ * If non-zero waves are in flight, store the corresponding doorbell offset
+ * of the queue, along with the wave count.
*
- * Determine if VMID from above step maps to pasid provided as parameter. If
- * it matches agrregate the wave count. That the VMID will not match pasid is
- * a normal condition i.e. a device is expected to support multiple queues
- * from multiple proceses.
+ * Determine if the queue belongs to the process by comparing the doorbell
+ * offset against the process's queues. If it matches, aggregate the wave
+ * count for the process.
*
* Reading registers referenced above involves programming GRBM appropriately
*/
-void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
- int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst)
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
+ struct kfd_cu_occupancy *cu_occupancy,
+ int *max_waves_per_cu, uint32_t inst)
{
int qidx;
- int vmid;
int se_idx;
- int sh_idx;
int se_cnt;
- int sh_cnt;
- int wave_cnt;
int queue_map;
- int pasid_tmp;
int max_queue_cnt;
- int vmid_wave_cnt = 0;
DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);
lock_spi_csq_mutexes(adev);
- soc15_grbm_select(adev, 1, 0, 0, 0, inst);
+ soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));
/*
* Iterate through the shader engines and arrays of the device
@@ -1048,51 +1043,38 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
AMDGPU_MAX_QUEUES);
max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
- sh_cnt = adev->gfx.config.max_sh_per_se;
se_cnt = adev->gfx.config.max_shader_engines;
for (se_idx = 0; se_idx < se_cnt; se_idx++) {
- for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
+ amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
+ queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);
+
+ /*
+ * Assumption: queue map encodes following schema: four
+ * pipes per each micro-engine, with each pipe mapping
+ * eight queues. This schema is true for GFX9 devices
+ * and must be verified for newer device families
+ */
+ for (qidx = 0; qidx < max_queue_cnt; qidx++) {
+ /* Skip qeueus that are not associated with
+ * compute functions
+ */
+ if (!test_bit(qidx, cp_queue_bitmap))
+ continue;
- amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst);
- queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
+ if (!(queue_map & (1 << qidx)))
+ continue;
- /*
- * Assumption: queue map encodes following schema: four
- * pipes per each micro-engine, with each pipe mapping
- * eight queues. This schema is true for GFX9 devices
- * and must be verified for newer device families
- */
- for (qidx = 0; qidx < max_queue_cnt; qidx++) {
-
- /* Skip qeueus that are not associated with
- * compute functions
- */
- if (!test_bit(qidx, cp_queue_bitmap))
- continue;
-
- if (!(queue_map & (1 << qidx)))
- continue;
-
- /* Get number of waves in flight and aggregate them */
- get_wave_count(adev, qidx, &wave_cnt, &vmid,
- inst);
- if (wave_cnt != 0) {
- pasid_tmp =
- RREG32(SOC15_REG_OFFSET(OSSSYS, inst,
- mmIH_VMID_0_LUT) + vmid);
- if (pasid_tmp == pasid)
- vmid_wave_cnt += wave_cnt;
- }
- }
+ /* Get number of waves in flight and aggregate them */
+ get_wave_count(adev, qidx, &cu_occupancy[qidx],
+ inst);
}
}
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
- soc15_grbm_select(adev, 0, 0, 0, 0, inst);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
unlock_spi_csq_mutexes(adev);
/* Update the output parameters and return */
- *pasid_wave_cnt = vmid_wave_cnt;
*max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
adev->gfx.cu_info.max_waves_per_simd;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 988c50ac3be0..b6a91a552aa4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -52,8 +52,9 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid);
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base);
-void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
- int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst);
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
+ struct kfd_cu_occupancy *cu_occupancy,
+ int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 4afef5b46c7d..ce5ca304dba9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1499,7 +1499,7 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
}
}
- ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
+ ret = amdgpu_bo_pin(bo, domain);
if (ret)
pr_err("Error in Pinning BO to domain: %d\n", domain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 42e64bce661e..45affc02548c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -87,8 +87,9 @@ static bool check_atom_bios(uint8_t *bios, size_t size)
* part of the system bios. On boot, the system bios puts a
* copy of the igp rom at the start of vram if a discrete card is
* present.
+ * For SR-IOV, the vbios image is also put in VRAM in the VF.
*/
-static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
+static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
{
uint8_t __iomem *bios;
resource_size_t vram_base;
@@ -284,10 +285,6 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
acpi_status status;
bool found = false;
- /* ATRM is for the discrete card only */
- if (adev->flags & AMD_IS_APU)
- return false;
-
/* ATRM is for on-platform devices only */
if (dev_is_removable(&adev->pdev->dev))
return false;
@@ -343,11 +340,8 @@ static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev)
{
- if (adev->flags & AMD_IS_APU)
- return igp_read_bios_from_vram(adev);
- else
- return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
- false : amdgpu_asic_read_disabled_bios(adev);
+ return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
+ false : amdgpu_asic_read_disabled_bios(adev);
}
#ifdef CONFIG_ACPI
@@ -414,7 +408,36 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
}
#endif
-bool amdgpu_get_bios(struct amdgpu_device *adev)
+static bool amdgpu_get_bios_apu(struct amdgpu_device *adev)
+{
+ if (amdgpu_acpi_vfct_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from VFCT\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios_from_vram(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
+ return false;
+
+success:
+ return true;
+}
+
+static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev)
{
if (amdgpu_atrm_get_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from ATRM\n");
@@ -426,7 +449,8 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
goto success;
}
- if (igp_read_bios_from_vram(adev)) {
+ /* this is required for SR-IOV */
+ if (amdgpu_read_bios_from_vram(adev)) {
dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
goto success;
}
@@ -455,10 +479,24 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
return false;
success:
- adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
return true;
}
+bool amdgpu_get_bios(struct amdgpu_device *adev)
+{
+ bool found;
+
+ if (adev->flags & AMD_IS_APU)
+ found = amdgpu_get_bios_apu(adev);
+ else
+ found = amdgpu_get_bios_dgpu(adev);
+
+ if (found)
+ adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
+
+ return found;
+}
+
/* helper function for soc15 and onwards to read bios from rom */
bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
u8 *bios, u32 length_bytes)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f4628412dac4..c2394c8b4d6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4107,9 +4107,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->mm_stats.lock);
spin_lock_init(&adev->wb.lock);
- INIT_LIST_HEAD(&adev->shadow_list);
- mutex_init(&adev->shadow_list_lock);
-
INIT_LIST_HEAD(&adev->reset_list);
INIT_LIST_HEAD(&adev->ras_list);
@@ -5030,80 +5027,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
}
/**
- * amdgpu_device_recover_vram - Recover some VRAM contents
- *
- * @adev: amdgpu_device pointer
- *
- * Restores the contents of VRAM buffers from the shadows in GTT. Used to
- * restore things like GPUVM page tables after a GPU reset where
- * the contents of VRAM might be lost.
- *
- * Returns:
- * 0 on success, negative error code on failure.
- */
-static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
-{
- struct dma_fence *fence = NULL, *next = NULL;
- struct amdgpu_bo *shadow;
- struct amdgpu_bo_vm *vmbo;
- long r = 1, tmo;
-
- if (amdgpu_sriov_runtime(adev))
- tmo = msecs_to_jiffies(8000);
- else
- tmo = msecs_to_jiffies(100);
-
- dev_info(adev->dev, "recover vram bo from shadow start\n");
- mutex_lock(&adev->shadow_list_lock);
- list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
- /* If vm is compute context or adev is APU, shadow will be NULL */
- if (!vmbo->shadow)
- continue;
- shadow = vmbo->shadow;
-
- /* No need to recover an evicted BO */
- if (!shadow->tbo.resource ||
- shadow->tbo.resource->mem_type != TTM_PL_TT ||
- shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
- shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
- continue;
-
- r = amdgpu_bo_restore_shadow(shadow, &next);
- if (r)
- break;
-
- if (fence) {
- tmo = dma_fence_wait_timeout(fence, false, tmo);
- dma_fence_put(fence);
- fence = next;
- if (tmo == 0) {
- r = -ETIMEDOUT;
- break;
- } else if (tmo < 0) {
- r = tmo;
- break;
- }
- } else {
- fence = next;
- }
- }
- mutex_unlock(&adev->shadow_list_lock);
-
- if (fence)
- tmo = dma_fence_wait_timeout(fence, false, tmo);
- dma_fence_put(fence);
-
- if (r < 0 || tmo <= 0) {
- dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
- return -EIO;
- }
-
- dev_info(adev->dev, "recover vram bo from shadow done\n");
- return 0;
-}
-
-
-/**
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
* @adev: amdgpu_device pointer
@@ -5165,12 +5088,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (r)
return r;
- if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
+ if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
amdgpu_inc_vram_lost(adev);
- r = amdgpu_device_recover_vram(adev);
- }
- if (r)
- return r;
/* need to be called during full access so we can't do it later like
* bare-metal does.
@@ -5569,9 +5488,7 @@ out:
}
}
- if (!r)
- r = amdgpu_device_recover_vram(tmp_adev);
- else
+ if (r)
tmp_adev->asic_reset_res = r;
}
@@ -6189,7 +6106,7 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
p2p_addressable = !(adev->gmc.aper_base & address_mask ||
aper_limit & address_mask);
}
- return is_large_bar && p2p_access && p2p_addressable;
+ return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
#else
return false;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 092ec11258cd..b119d27271c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -233,6 +233,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
}
if (!adev->enable_virtual_display) {
+ new_abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(new_abo,
amdgpu_display_supported_domains(adev, new_abo->flags));
if (unlikely(r != 0)) {
@@ -1474,7 +1475,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) &&
((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
- connector->display_info.is_hdmi &&
+ connector && connector->display_info.is_hdmi &&
amdgpu_display_is_hdtv_mode(mode)))) {
if (amdgpu_encoder->underscan_hborder != 0)
amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
@@ -1759,6 +1760,7 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
r = amdgpu_bo_reserve(aobj, true);
if (r == 0) {
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
if (r != 0)
dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f57411ed2dc2..81d9877c8735 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -117,9 +117,10 @@
* - 3.56.0 - Update IB start address and size alignment for decode and encode
* - 3.57.0 - Compute tunneling on GFX10+
* - 3.58.0 - Add GFX12 DCC support
+ * - 3.59.0 - Cleared VRAM
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 58
+#define KMS_DRIVER_MINOR 59
#define KMS_DRIVER_PATCHLEVEL 0
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 0e617dff8765..1a5df8b94661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -43,8 +43,6 @@
#include "amdgpu_hmm.h"
#include "amdgpu_xgmi.h"
-static const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
-
static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
@@ -87,11 +85,11 @@ static const struct vm_operations_struct amdgpu_gem_vm_ops = {
static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
{
- struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
- if (robj) {
- amdgpu_hmm_unregister(robj);
- amdgpu_bo_unref(&robj);
+ if (aobj) {
+ amdgpu_hmm_unregister(aobj);
+ ttm_bo_put(&aobj->tbo);
}
}
@@ -126,7 +124,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
bo = &ubo->bo;
*obj = &bo->tbo.base;
- (*obj)->funcs = &amdgpu_gem_object_funcs;
return 0;
}
@@ -295,7 +292,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
return drm_gem_ttm_mmap(obj, vma);
}
-static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
+const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
.free = amdgpu_gem_object_free,
.open = amdgpu_gem_object_open,
.close = amdgpu_gem_object_close,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index f30264782ba2..3a8f57900a3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -33,6 +33,8 @@
#define AMDGPU_GEM_DOMAIN_MAX 0x3
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base)
+extern const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
+
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index ad6bf5d4e0a9..16f2605ac50b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -107,8 +107,11 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
/*
* Do the coredump immediately after a job timeout to get a very
* close dump/snapshot/representation of GPU's current error status
+ * Skip it for SRIOV, since VF FLR will be triggered by host driver
+ * before job timeout
*/
- amdgpu_job_core_dump(adev, job);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_job_core_dump(adev, job);
if (amdgpu_gpu_recovery &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e32161f6b67a..44819cdba7fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -77,24 +77,6 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
amdgpu_bo_destroy(tbo);
}
-static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
- struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo;
- struct amdgpu_bo_vm *vmbo;
-
- bo = shadow_bo->parent;
- vmbo = to_amdgpu_bo_vm(bo);
- /* in case amdgpu_device_recover_vram got NULL of bo->parent */
- if (!list_empty(&vmbo->shadow_list)) {
- mutex_lock(&adev->shadow_list_lock);
- list_del_init(&vmbo->shadow_list);
- mutex_unlock(&adev->shadow_list_lock);
- }
-
- amdgpu_bo_destroy(tbo);
-}
-
/**
* amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo
* @bo: buffer object to be checked
@@ -108,8 +90,7 @@ static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
{
if (bo->destroy == &amdgpu_bo_destroy ||
- bo->destroy == &amdgpu_bo_user_destroy ||
- bo->destroy == &amdgpu_bo_vm_destroy)
+ bo->destroy == &amdgpu_bo_user_destroy)
return true;
return false;
@@ -583,6 +564,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (bo == NULL)
return -ENOMEM;
drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
+ bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
bo->vm_bo = NULL;
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
bp->domain;
@@ -723,52 +705,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
}
/**
- * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list
- *
- * @vmbo: BO that will be inserted into the shadow list
- *
- * Insert a BO to the shadow list.
- */
-void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev);
-
- mutex_lock(&adev->shadow_list_lock);
- list_add_tail(&vmbo->shadow_list, &adev->shadow_list);
- vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo);
- vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy;
- mutex_unlock(&adev->shadow_list_lock);
-}
-
-/**
- * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
- *
- * @shadow: &amdgpu_bo shadow to be restored
- * @fence: dma_fence associated with the operation
- *
- * Copies a buffer object's shadow content back to the object.
- * This is used for recovering a buffer from its shadow in case of a gpu
- * reset where vram context may be lost.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
-
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- uint64_t shadow_addr, parent_addr;
-
- shadow_addr = amdgpu_bo_gpu_offset(shadow);
- parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
-
- return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
- amdgpu_bo_size(shadow), NULL, fence,
- true, false, 0);
-}
-
-/**
* amdgpu_bo_kmap - map an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be mapped
* @ptr: kernel virtual address to be returned
@@ -851,7 +787,7 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
if (bo == NULL)
return NULL;
- ttm_bo_get(&bo->tbo);
+ drm_gem_object_get(&bo->tbo.base);
return bo;
}
@@ -863,40 +799,30 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
*/
void amdgpu_bo_unref(struct amdgpu_bo **bo)
{
- struct ttm_buffer_object *tbo;
-
if ((*bo) == NULL)
return;
- tbo = &((*bo)->tbo);
- ttm_bo_put(tbo);
+ drm_gem_object_put(&(*bo)->tbo.base);
*bo = NULL;
}
/**
- * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object
+ * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be pinned
* @domain: domain to be pinned to
- * @min_offset: the start of requested address range
- * @max_offset: the end of requested address range
*
- * Pins the buffer object according to requested domain and address range. If
- * the memory is unbound gart memory, binds the pages into gart table. Adjusts
- * pin_count and pin_size accordingly.
+ * Pins the buffer object according to requested domain. If the memory is
+ * unbound gart memory, binds the pages into gart table. Adjusts pin_count and
+ * pin_size accordingly.
*
* Pinning means to lock pages in memory along with keeping them at a fixed
* offset. It is required when a buffer can not be moved, for example, when
* a display buffer is being scanned out.
*
- * Compared with amdgpu_bo_pin(), this function gives more flexibility on
- * where to pin a buffer if there are specific restrictions on where a buffer
- * must be located.
- *
* Returns:
* 0 for success or a negative error code on failure.
*/
-int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset)
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { false, false };
@@ -905,9 +831,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return -EPERM;
- if (WARN_ON_ONCE(min_offset > max_offset))
- return -EINVAL;
-
/* Check domain to be pinned to against preferred domains */
if (bo->preferred_domains & domain)
domain = bo->preferred_domains & domain;
@@ -933,14 +856,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
ttm_bo_pin(&bo->tbo);
-
- if (max_offset != 0) {
- u64 domain_start = amdgpu_ttm_domain_start(adev,
- mem_type);
- WARN_ON_ONCE(max_offset <
- (amdgpu_bo_gpu_offset(bo) - domain_start));
- }
-
return 0;
}
@@ -957,17 +872,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
amdgpu_bo_placement_from_domain(bo, domain);
for (i = 0; i < bo->placement.num_placement; i++) {
- unsigned int fpfn, lpfn;
-
- fpfn = min_offset >> PAGE_SHIFT;
- lpfn = max_offset >> PAGE_SHIFT;
-
- if (fpfn > bo->placements[i].fpfn)
- bo->placements[i].fpfn = fpfn;
- if (!bo->placements[i].lpfn ||
- (lpfn && lpfn < bo->placements[i].lpfn))
- bo->placements[i].lpfn = lpfn;
-
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
bo->placements[i].mem_type == TTM_PL_VRAM)
bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
@@ -994,24 +898,6 @@ error:
}
/**
- * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
- * @bo: &amdgpu_bo buffer object to be pinned
- * @domain: domain to be pinned to
- *
- * A simple wrapper to amdgpu_bo_pin_restricted().
- * Provides a simpler API for buffers that do not have any strict restrictions
- * on where a buffer must be located.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
-{
- bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
-}
-
-/**
* amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be unpinned
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index d7e27957013f..717e47b46167 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -136,8 +136,6 @@ struct amdgpu_bo_user {
struct amdgpu_bo_vm {
struct amdgpu_bo bo;
- struct amdgpu_bo *shadow;
- struct list_head shadow_list;
struct amdgpu_vm_bo_base entries[];
};
@@ -275,22 +273,6 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo)
return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED;
}
-/**
- * amdgpu_bo_shadowed - check if the BO is shadowed
- *
- * @bo: BO to be tested.
- *
- * Returns:
- * NULL if not shadowed or else return a BO pointer.
- */
-static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo)
-{
- if (bo->tbo.type == ttm_bo_type_kernel)
- return to_amdgpu_bo_vm(bo)->shadow;
-
- return NULL;
-}
-
bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
@@ -322,8 +304,6 @@ void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
void amdgpu_bo_unref(struct amdgpu_bo **bo);
int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
-int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset);
void amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
@@ -349,9 +329,6 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
struct amdgpu_mem_stats *stats);
-void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo);
-int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
- struct dma_fence **fence);
uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
uint32_t domain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 189574d53ebd..0b28b2cf1517 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2853,7 +2853,7 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
if (ret)
return ret;
- /* Start rlc autoload after psp recieved all the gfx firmware */
+ /* Start rlc autoload after psp received all the gfx firmware */
if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) {
ret = psp_rlc_autoload_start(psp);
@@ -3425,9 +3425,11 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
const struct psp_firmware_header_v2_0 *sos_hdr_v2_0;
- int err = 0;
+ const struct psp_firmware_header_v2_1 *sos_hdr_v2_1;
+ int fw_index, fw_bin_count, start_index = 0;
+ const struct psp_fw_bin_desc *fw_bin;
uint8_t *ucode_array_start_addr;
- int fw_index = 0;
+ int err = 0;
err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name);
if (err)
@@ -3478,15 +3480,30 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
case 2:
sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data;
- if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
+ fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count);
+
+ if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) {
dev_err(adev->dev, "packed SOS count exceeds maximum limit\n");
err = -EINVAL;
goto out;
}
- for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) {
- err = parse_sos_bin_descriptor(psp,
- &sos_hdr_v2_0->psp_fw_bin[fw_index],
+ if (sos_hdr_v2_0->header.header_version_minor == 1) {
+ sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data;
+
+ fw_bin = sos_hdr_v2_1->psp_fw_bin;
+
+ if (psp_is_aux_sos_load_required(psp))
+ start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+ else
+ fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+
+ } else {
+ fw_bin = sos_hdr_v2_0->psp_fw_bin;
+ }
+
+ for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) {
+ err = parse_sos_bin_descriptor(psp, fw_bin + fw_index,
sos_hdr_v2_0);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 74a96516c913..e8abbbcb4326 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -138,6 +138,7 @@ struct psp_funcs {
int (*vbflash_stat)(struct psp_context *psp);
int (*fatal_error_recovery_quirk)(struct psp_context *psp);
bool (*get_ras_capability)(struct psp_context *psp);
+ bool (*is_aux_sos_load_required)(struct psp_context *psp);
};
struct ta_funcs {
@@ -464,6 +465,9 @@ struct amdgpu_psp_funcs {
((psp)->funcs->fatal_error_recovery_quirk ? \
(psp)->funcs->fatal_error_recovery_quirk((psp)) : 0)
+#define psp_is_aux_sos_load_required(psp) \
+ ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0)
+
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 61a2f386d9fb..1a1395c5fff1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -882,7 +882,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
if (ret)
return ret;
- /* gfx block ras dsiable cmd must send to ras-ta */
+ /* gfx block ras disable cmd must send to ras-ta */
if (head->block == AMDGPU_RAS_BLOCK__GFX)
con->features |= BIT(head->block);
@@ -3468,6 +3468,11 @@ init_ras_enabled_flag:
/* aca is disabled by default */
adev->aca.is_enabled = false;
+
+ /* bad page feature is not applicable to specific app platform */
+ if (adev->gmc.is_app_apu &&
+ amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(12, 0, 0))
+ amdgpu_bad_page_threshold = 0;
}
static void amdgpu_ras_counte_dw(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index aab8077e5098..f28f6b4ba765 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -58,7 +58,7 @@
#define EEPROM_I2C_MADDR_4 0x40000
/*
- * The 2 macros bellow represent the actual size in bytes that
+ * The 2 macros below represent the actual size in bytes that
* those entities occupy in the EEPROM memory.
* RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
* uses uint64 to store 6b fields such as retired_page.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index bdf1ef825d89..c586ab4c911b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -260,6 +260,36 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
return 0;
}
+/**
+ * amdgpu_sync_kfd - sync to KFD fences
+ *
+ * @sync: sync object to add KFD fences to
+ * @resv: reservation object with KFD fences
+ *
+ * Extract all KFD fences and add them to the sync object.
+ */
+int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
+{
+ struct dma_resv_iter cursor;
+ struct dma_fence *f;
+ int r = 0;
+
+ dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_for_each_fence_unlocked(&cursor, f) {
+ void *fence_owner = amdgpu_sync_get_owner(f);
+
+ if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
+ continue;
+
+ r = amdgpu_sync_fence(sync, f);
+ if (r)
+ break;
+ }
+ dma_resv_iter_end(&cursor);
+
+ return r;
+}
+
/* Free the entry back to the slab */
static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index cf1e9e858efd..e3272dce798d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -51,6 +51,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
+int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b8bc7fa8c375..74adb983ab03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1970,7 +1970,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
(unsigned int)(gtt_size / (1024 * 1024)));
- /* Initiailize doorbell pool on PCI BAR */
+ /* Initialize doorbell pool on PCI BAR */
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
if (r) {
DRM_ERROR("Failed initializing doorbell heap.\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 5bc37acd3981..4e23419b92d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -136,6 +136,14 @@ struct psp_firmware_header_v2_0 {
struct psp_fw_bin_desc psp_fw_bin[];
};
+/* version_major=2, version_minor=1 */
+struct psp_firmware_header_v2_1 {
+ struct common_firmware_header header;
+ uint32_t psp_fw_bin_count;
+ uint32_t psp_aux_fw_bin_index;
+ struct psp_fw_bin_desc psp_fw_bin[];
+};
+
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -426,6 +434,7 @@ union amdgpu_firmware_header {
struct psp_firmware_header_v1_1 psp_v1_1;
struct psp_firmware_header_v1_3 psp_v1_3;
struct psp_firmware_header_v2_0 psp_v2_0;
+ struct psp_firmware_header_v2_0 psp_v2_1;
struct ta_firmware_header_v1_0 ta;
struct ta_firmware_header_v2_0 ta_v2_0;
struct gfx_firmware_header_v1_0 gfx;
@@ -447,7 +456,7 @@ union amdgpu_firmware_header {
uint8_t raw[0x100];
};
-#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc))
+#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2)
/*
* fw loading support
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index e5f508d34ed8..d4c2afafbb73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -338,6 +338,7 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
else
domain = AMDGPU_GEM_DOMAIN_VRAM;
+ rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(rbo, domain);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2452dfa6314f..6005280f5f38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -465,7 +465,6 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
{
uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
struct amdgpu_vm_bo_base *bo_base;
- struct amdgpu_bo *shadow;
struct amdgpu_bo *bo;
int r;
@@ -486,16 +485,10 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
spin_unlock(&vm->status_lock);
bo = bo_base->bo;
- shadow = amdgpu_bo_shadowed(bo);
r = validate(param, bo);
if (r)
return r;
- if (shadow) {
- r = validate(param, shadow);
- if (r)
- return r;
- }
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
@@ -1176,6 +1169,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
AMDGPU_SYNC_EQ_OWNER, vm);
if (r)
goto error_free;
+ if (bo) {
+ r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
+ if (r)
+ goto error_free;
+ }
+
} else {
struct drm_gem_object *obj = &bo->tbo.base;
@@ -2149,10 +2148,6 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
{
struct amdgpu_vm_bo_base *bo_base;
- /* shadow bo doesn't have bo base, its validation needs its parent */
- if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo))
- bo = bo->parent;
-
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
@@ -2482,7 +2477,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
root_bo = amdgpu_bo_ref(&root->bo);
r = amdgpu_bo_reserve(root_bo, true);
if (r) {
- amdgpu_bo_unref(&root->shadow);
amdgpu_bo_unref(&root_bo);
goto error_free_delayed;
}
@@ -2575,11 +2569,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->last_update = dma_fence_get_stub();
vm->is_compute_context = true;
- /* Free the shadow bo for compute VM */
- amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
-
- goto unreserve_bo;
-
unreserve_bo:
amdgpu_bo_unreserve(vm->root.bo);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index a076f43097e4..f78a0434a48f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -383,14 +383,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (r)
return r;
- if (vmbo->shadow) {
- struct amdgpu_bo *shadow = vmbo->shadow;
-
- r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
- if (r)
- return r;
- }
-
if (!drm_dev_enter(adev_to_drm(adev), &idx))
return -ENODEV;
@@ -448,10 +440,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int32_t xcp_id)
{
struct amdgpu_bo_param bp;
- struct amdgpu_bo *bo;
- struct dma_resv *resv;
unsigned int num_entries;
- int r;
memset(&bp, 0, sizeof(bp));
@@ -484,42 +473,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (vm->root.bo)
bp.resv = vm->root.bo->tbo.base.resv;
- r = amdgpu_bo_create_vm(adev, &bp, vmbo);
- if (r)
- return r;
-
- bo = &(*vmbo)->bo;
- if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
- (*vmbo)->shadow = NULL;
- return 0;
- }
-
- if (!bp.resv)
- WARN_ON(dma_resv_lock(bo->tbo.base.resv,
- NULL));
- resv = bp.resv;
- memset(&bp, 0, sizeof(bp));
- bp.size = amdgpu_vm_pt_size(adev, level);
- bp.domain = AMDGPU_GEM_DOMAIN_GTT;
- bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- bp.type = ttm_bo_type_kernel;
- bp.resv = bo->tbo.base.resv;
- bp.bo_ptr_size = sizeof(struct amdgpu_bo);
- bp.xcp_id_plus1 = xcp_id + 1;
-
- r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
-
- if (!resv)
- dma_resv_unlock(bo->tbo.base.resv);
-
- if (r) {
- amdgpu_bo_unref(&bo);
- return r;
- }
-
- amdgpu_bo_add_to_shadow_list(*vmbo);
-
- return 0;
+ return amdgpu_bo_create_vm(adev, &bp, vmbo);
}
/**
@@ -569,7 +523,6 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
return 0;
error_free_pt:
- amdgpu_bo_unref(&pt->shadow);
amdgpu_bo_unref(&pt_bo);
return r;
}
@@ -581,17 +534,10 @@ error_free_pt:
*/
static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
{
- struct amdgpu_bo *shadow;
-
if (!entry->bo)
return;
entry->bo->vm_bo = NULL;
- shadow = amdgpu_bo_shadowed(entry->bo);
- if (shadow) {
- ttm_bo_set_bulk_move(&shadow->tbo, NULL);
- amdgpu_bo_unref(&shadow);
- }
ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
spin_lock(&entry->vm->status_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 4772fba33285..46d9fb433ab2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -35,16 +35,7 @@
*/
static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
{
- int r;
-
- r = amdgpu_ttm_alloc_gart(&table->bo.tbo);
- if (r)
- return r;
-
- if (table->shadow)
- r = amdgpu_ttm_alloc_gart(&table->shadow->tbo);
-
- return r;
+ return amdgpu_ttm_alloc_gart(&table->bo.tbo);
}
/* Allocate a new job for @count PTE updates */
@@ -265,17 +256,13 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
if (!p->pages_addr) {
/* set page commands needed */
- if (vmbo->shadow)
- amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr,
- count, incr, flags);
amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
incr, flags);
return 0;
}
/* copy commands needed */
- ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
- (vmbo->shadow ? 2 : 1);
+ ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
/* for padding */
ndw -= 7;
@@ -290,8 +277,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
pte[i] |= flags;
}
- if (vmbo->shadow)
- amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes);
amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
pe += nptes * 8;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 90138bc5f03d..32775260556f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -180,6 +180,6 @@ amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
#define for_each_xcp(xcp_mgr, xcp, i) \
for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
- xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
+ ++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index 26e2188101e7..5e8833e4fed2 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -94,8 +94,6 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
case AMDGPU_RING_TYPE_VCN_ENC:
case AMDGPU_RING_TYPE_VCN_JPEG:
ip_blk = AMDGPU_XCP_VCN;
- if (aqua_vanjaram_xcp_vcn_shared(adev))
- inst_mask = 1 << (inst_idx * 2);
break;
default:
DRM_ERROR("Not support ring type %d!", ring->funcs->type);
@@ -105,6 +103,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
ring->xcp_id = xcp_id;
+ dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name,
+ ring->xcp_id);
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id;
break;
@@ -394,38 +394,31 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x
struct amdgpu_xcp_ip *ip)
{
struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_sdma, num_vcn, num_shared_vcn, num_xcp;
int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
- int num_sdma, num_vcn;
num_sdma = adev->sdma.num_instances;
num_vcn = adev->vcn.num_vcn_inst;
+ num_shared_vcn = 1;
+
+ num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
+ num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp;
switch (xcp_mgr->mode) {
case AMDGPU_SPX_PARTITION_MODE:
- num_sdma_xcp = num_sdma;
- num_vcn_xcp = num_vcn;
- break;
case AMDGPU_DPX_PARTITION_MODE:
- num_sdma_xcp = num_sdma / 2;
- num_vcn_xcp = num_vcn / 2;
- break;
case AMDGPU_TPX_PARTITION_MODE:
- num_sdma_xcp = num_sdma / 3;
- num_vcn_xcp = num_vcn / 3;
- break;
case AMDGPU_QPX_PARTITION_MODE:
- num_sdma_xcp = num_sdma / 4;
- num_vcn_xcp = num_vcn / 4;
- break;
case AMDGPU_CPX_PARTITION_MODE:
- num_sdma_xcp = 2;
- num_vcn_xcp = num_vcn ? 1 : 0;
+ num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp);
+ num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp);
break;
default:
return -EINVAL;
}
- num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
+ if (num_vcn && num_xcp > num_vcn)
+ num_shared_vcn = num_xcp / num_vcn;
switch (ip_id) {
case AMDGPU_XCP_GFXHUB:
@@ -441,7 +434,8 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x
ip->ip_funcs = &sdma_v4_4_2_xcp_funcs;
break;
case AMDGPU_XCP_VCN:
- ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id);
+ ip->inst_mask =
+ XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn);
/* TODO : Assign IP funcs */
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 742adbc460c9..70c1399f738d 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1881,6 +1881,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -2401,6 +2402,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 8d46ebadfa46..f154c24499c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1931,6 +1931,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -2485,6 +2486,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index f08dc6a3886f..a7fcb135827f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -1861,6 +1861,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -2321,6 +2322,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index a6a3adf2ae13..77ac3f114d24 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1828,6 +1828,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -2320,6 +2321,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index d1357c01eb39..47b47d21f464 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -202,12 +202,16 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
};
-static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
+static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
};
+static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000),
+};
+
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@@ -3495,10 +3499,14 @@ static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_12_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_12_0));
+
if (adev->rev_id == 0)
soc15_program_register_sequence(adev,
- golden_settings_gc_12_0,
- (const u32)ARRAY_SIZE(golden_settings_gc_12_0));
+ golden_settings_gc_12_0_rev0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0));
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 408e5600bb61..c100845409f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1701,7 +1701,15 @@ static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev,
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0);
} else {
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL,
- (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
adev->gfx.kiq[xcc_id].ring.sched.ready = false;
}
udelay(50);
@@ -2240,6 +2248,8 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id);
if (r)
return r;
+ } else {
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
}
r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id);
@@ -2299,12 +2309,6 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable,
- int xcc_id)
-{
- gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id);
-}
-
static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
{
if (amdgpu_gfx_disable_kcq(adev, xcc_id))
@@ -2336,7 +2340,7 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
}
gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id);
- gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id);
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
}
static int gfx_v9_4_3_hw_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
index 6c1891889c4d..d4f72e47ae9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -153,7 +153,7 @@ static void imu_v11_0_setup(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
}
- //disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB
+ //disable imu Rtavfs, SmsRepair, DfllBTC, and ClkB
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
imu_reg_val |= 0x10007;
WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index ee91ff9e52a2..231a3d490ea8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -161,7 +161,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
int api_status_off)
{
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
- signed long timeout = 3000000; /* 3000 ms */
+ signed long timeout = 2100000; /* 2100 ms */
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring[0];
struct MES_API_STATUS *api_status;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index e499b2857a01..8d27421689c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -146,7 +146,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
int api_status_off)
{
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
- signed long timeout = 3000000; /* 3000 ms */
+ signed long timeout = 2100000; /* 2100 ms */
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring[pipe];
spinlock_t *ring_lock = &mes->ring_lock[pipe];
@@ -479,6 +479,11 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
union MESAPI__MISC misc_pkt;
int pipe;
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
memset(&misc_pkt, 0, sizeof(misc_pkt));
misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
@@ -513,6 +518,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
break;
case MES_MISC_OP_SET_SHADER_DEBUGGER:
+ pipe = AMDGPU_MES_SCHED_PIPE;
misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
misc_pkt.set_shader_debugger.process_context_addr =
input->set_shader_debugger.process_context_addr;
@@ -530,11 +536,6 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
return -EINVAL;
}
- if (mes->adev->enable_uni_mes)
- pipe = AMDGPU_MES_KIQ_PIPE;
- else
- pipe = AMDGPU_MES_SCHED_PIPE;
-
return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&misc_pkt, sizeof(misc_pkt),
offsetof(union MESAPI__MISC, api_status));
@@ -608,6 +609,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
+ mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
/*
* Keep oversubscribe timer for sdma . When we have unmapped doorbell
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index fa479dfa1ec1..739fce4fa8fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -365,7 +365,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
} else {
- /* Disbale ASPM L1 */
+ /* Disable ASPM L1 */
data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
/* Disable ASPM TxL0s */
data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 1251ee38a676..51e470e8d67d 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -81,6 +81,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin");
/* memory training timeout define */
#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
+#define regMP1_PUB_SCRATCH0 0x3b10090
+
static int psp_v13_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -807,6 +809,20 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp)
}
}
+static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ u32 pmfw_ver;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
+ return false;
+
+ /* load 4e version of sos if pmfw version less than 85.115.0 */
+ pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4);
+
+ return (pmfw_ver < 0x557300);
+}
+
static const struct psp_funcs psp_v13_0_funcs = {
.init_microcode = psp_v13_0_init_microcode,
.wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
@@ -830,6 +846,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
.vbflash_stat = psp_v13_0_vbflash_status,
.fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
.get_ras_capability = psp_v13_0_get_ras_capability,
+ .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required,
};
void psp_v13_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index aa637541da58..e65194fe94af 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -710,7 +710,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
upper_32_bits(wptr_gpu_addr));
wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
if (ring->use_pollmem) {
- /*wptr polling is not enogh fast, directly clean the wptr register */
+ /*wptr polling is not enough fast, directly clean the wptr register */
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
SDMA0_GFX_RB_WPTR_POLL_CNTL,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index cfd8e183ad50..a8763496aed3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1080,13 +1080,16 @@ static void sdma_v7_0_vm_copy_pte(struct amdgpu_ib *ib,
unsigned bytes = count * 8;
ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
- SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_CPV(1);
+
ib->ptr[ib->length_dw++] = bytes - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = 0;
}
@@ -1744,7 +1747,7 @@ static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = {
- .copy_pte_num_dw = 7,
+ .copy_pte_num_dw = 8,
.copy_pte = sdma_v7_0_vm_copy_pte,
.write_pte = sdma_v7_0_vm_write_pte,
.set_pte_pde = sdma_v7_0_vm_set_pte_pde,
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
index e4e30b9d481b..c04fdd2d5b38 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
@@ -60,7 +60,7 @@ static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *f
{
u32 data;
- /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */
+ /* CGTT_ROM_CLK_CTRL0 is not available for APUs */
if (adev->flags & AMD_IS_APU)
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c
index b0c3678cfb31..fd4c3d4f8387 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc24.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.c
@@ -250,13 +250,6 @@ static void soc24_program_aspm(struct amdgpu_device *adev)
adev->nbio.funcs->program_aspm(adev);
}
-static void soc24_enable_doorbell_aperture(struct amdgpu_device *adev,
- bool enable)
-{
- adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
- adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
-}
-
const struct amdgpu_ip_block_version soc24_common_ip_block = {
.type = AMD_IP_BLOCK_TYPE_COMMON,
.major = 1,
@@ -454,6 +447,11 @@ static int soc24_common_late_init(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_get_irq(adev);
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
return 0;
}
@@ -491,7 +489,7 @@ static int soc24_common_hw_init(void *handle)
adev->df.funcs->hw_init(adev);
/* enable the doorbell aperture */
- soc24_enable_doorbell_aperture(adev, true);
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
return 0;
}
@@ -500,8 +498,13 @@ static int soc24_common_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- /* disable the doorbell aperture */
- soc24_enable_doorbell_aperture(adev, false);
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because soc21_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_put_irq(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index b1fd226b7efb..9d4f5352a62c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -1395,170 +1395,6 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static int vcn_v4_0_5_limit_sched(struct amdgpu_cs_parser *p,
- struct amdgpu_job *job)
-{
- struct drm_gpu_scheduler **scheds;
-
- /* The create msg must be in the first IB submitted */
- if (atomic_read(&job->base.entity->fence_seq))
- return -EINVAL;
-
- /* if VCN0 is harvested, we can't support AV1 */
- if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
- return -EINVAL;
-
- scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
- [AMDGPU_RING_PRIO_0].sched;
- drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
- return 0;
-}
-
-static int vcn_v4_0_5_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
- uint64_t addr)
-{
- struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_bo_va_mapping *map;
- uint32_t *msg, num_buffers;
- struct amdgpu_bo *bo;
- uint64_t start, end;
- unsigned int i;
- void *ptr;
- int r;
-
- addr &= AMDGPU_GMC_HOLE_MASK;
- r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
- if (r) {
- DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
- return r;
- }
-
- start = map->start * AMDGPU_GPU_PAGE_SIZE;
- end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
- if (addr & 0x7) {
- DRM_ERROR("VCN messages must be 8 byte aligned!\n");
- return -EINVAL;
- }
-
- bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r) {
- DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
- return r;
- }
-
- r = amdgpu_bo_kmap(bo, &ptr);
- if (r) {
- DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
- return r;
- }
-
- msg = ptr + addr - start;
-
- /* Check length */
- if (msg[1] > end - addr) {
- r = -EINVAL;
- goto out;
- }
-
- if (msg[3] != RDECODE_MSG_CREATE)
- goto out;
-
- num_buffers = msg[2];
- for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
- uint32_t offset, size, *create;
-
- if (msg[0] != RDECODE_MESSAGE_CREATE)
- continue;
-
- offset = msg[1];
- size = msg[2];
-
- if (offset + size > end) {
- r = -EINVAL;
- goto out;
- }
-
- create = ptr + addr + offset - start;
-
- /* H264, HEVC and VP9 can run on any instance */
- if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
- continue;
-
- r = vcn_v4_0_5_limit_sched(p, job);
- if (r)
- goto out;
- }
-
-out:
- amdgpu_bo_kunmap(bo);
- return r;
-}
-
-#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002)
-#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
-
-#define RADEON_VCN_ENGINE_INFO (0x30000001)
-#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16
-
-#define RENCODE_ENCODE_STANDARD_AV1 2
-#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003
-#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64
-
-/* return the offset in ib if id is found, -1 otherwise
- * to speed up the searching we only search upto max_offset
- */
-static int vcn_v4_0_5_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset)
-{
- int i;
-
- for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) {
- if (ib->ptr[i + 1] == id)
- return i;
- }
- return -1;
-}
-
-static int vcn_v4_0_5_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib)
-{
- struct amdgpu_ring *ring = amdgpu_job_ring(job);
- struct amdgpu_vcn_decode_buffer *decode_buffer;
- uint64_t addr;
- uint32_t val;
- int idx;
-
- /* The first instance can decode anything */
- if (!ring->me)
- return 0;
-
- /* RADEON_VCN_ENGINE_INFO is at the top of ib block */
- idx = vcn_v4_0_5_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
- RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
- if (idx < 0) /* engine info is missing */
- return 0;
-
- val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
- if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
- decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
-
- if (!(decode_buffer->valid_buf_flag & 0x1))
- return 0;
-
- addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
- decode_buffer->msg_buffer_address_lo;
- return vcn_v4_0_5_dec_msg(p, job, addr);
- } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
- idx = vcn_v4_0_5_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT,
- RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
- if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
- return vcn_v4_0_5_limit_sched(p, job);
- }
- return 0;
-}
-
static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
@@ -1566,7 +1402,6 @@ static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
.get_rptr = vcn_v4_0_5_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_5_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_5_unified_ring_set_wptr,
- .patch_cs_in_place = vcn_v4_0_5_ring_patch_cs_in_place,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 71b465f8d83e..648f40091aa3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -3540,6 +3540,30 @@ int debug_refresh_runlist(struct device_queue_manager *dqm)
return debug_map_and_unlock(dqm);
}
+bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ int doorbell_off, u32 *queue_format)
+{
+ struct queue *q;
+ bool r = false;
+
+ if (!queue_format)
+ return r;
+
+ dqm_lock(dqm);
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.doorbell_off == doorbell_off) {
+ *queue_format = q->properties.format;
+ r = true;
+ goto out;
+ }
+ }
+
+out:
+ dqm_unlock(dqm);
+ return r;
+}
#if defined(CONFIG_DEBUG_FS)
static void seq_reg_dump(struct seq_file *m,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 08b40826ad1e..09ab36f8e8c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -324,6 +324,9 @@ void set_queue_snapshot_entry(struct queue *q,
int debug_lock_and_unmap(struct device_queue_manager *dqm);
int debug_map_and_unlock(struct device_queue_manager *dqm);
int debug_refresh_runlist(struct device_queue_manager *dqm);
+bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ int doorbell_off, u32 *queue_format);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index bb8cbfc39b90..37b69fe0ede3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -306,23 +306,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
client_id == SOC15_IH_CLIENTID_UTCL2) {
struct kfd_vm_fault_info info = {0};
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
- uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
- uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry);
- int hub_inst = 0;
struct kfd_hsa_memory_exception_data exception_data;
- /* gfxhub */
- if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
- hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
- node_id);
- if (hub_inst < 0)
- hub_inst = 0;
- }
-
- /* mmhub */
- if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
- hub_inst = node_id / 4;
-
info.vmid = vmid;
info.mc_id = client_id;
info.page_addr = ih_ring_entry[4] |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
index d163d92a692f..2b72d5b4949b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
@@ -341,6 +341,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdmax_rlcx_doorbell_offset =
q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+ m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum
+ << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT)
+ & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK;
+
m->sdma_engine_id = q->sdma_engine_id;
m->sdma_queue_id = q->sdma_queue_id;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a902950cc060..d07acf1b2f93 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -270,6 +270,11 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
struct kfd_node *dev = NULL;
struct kfd_process *proc = NULL;
struct kfd_process_device *pdd = NULL;
+ int i;
+ struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES];
+ u32 queue_format;
+
+ memset(cu_occupancy, 0x0, sizeof(cu_occupancy));
pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
dev = pdd->dev;
@@ -287,8 +292,29 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
/* Collect wave count from device if it supports */
wave_cnt = 0;
max_waves_per_cu = 0;
- dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
- &max_waves_per_cu, 0);
+
+ /*
+ * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition.
+ * For AQL queues, because of cooperative dispatch we multiply the wave count
+ * by number of XCCs in the partition to get the total wave counts across all
+ * XCCs in the partition.
+ * For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is.
+ */
+ dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy,
+ &max_waves_per_cu, ffs(dev->xcc_mask) - 1);
+
+ for (i = 0; i < AMDGPU_MAX_QUEUES; i++) {
+ if (cu_occupancy[i].wave_cnt != 0 &&
+ kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd,
+ cu_occupancy[i].doorbell_off,
+ &queue_format)) {
+ if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4))
+ wave_cnt += cu_occupancy[i].wave_cnt;
+ else
+ wave_cnt += (NUM_XCC(dev->xcc_mask) *
+ cu_occupancy[i].wave_cnt);
+ }
+ }
/* Translate wave count to number of compute units */
cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index b439d4d0bd84..01b960b15274 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -517,7 +517,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (retval)
goto err_destroy_queue;
- kfd_procfs_del_queue(pqn->q);
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
@@ -527,6 +526,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (retval != -ETIME)
goto err_destroy_queue;
}
+ kfd_procfs_del_queue(pqn->q);
kfd_queue_release_buffers(pdd, &pqn->q->properties);
pqm_clean_queue_resource(pqm, pqn);
uninit_queue(pqn->q);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 0cff66735cfe..6e79028c5d78 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -808,6 +808,20 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
}
/**
+ * dmub_hpd_sense_callback - DMUB HPD sense processing callback.
+ * @adev: amdgpu_device pointer
+ * @notify: dmub notification structure
+ *
+ * HPD sense changes can occur during low power states and need to be
+ * notified from firmware to driver.
+ */
+static void dmub_hpd_sense_callback(struct amdgpu_device *adev,
+ struct dmub_notification *notify)
+{
+ DRM_DEBUG_DRIVER("DMUB HPD SENSE callback.\n");
+}
+
+/**
* register_dmub_notify_callback - Sets callback for DMUB notify
* @adev: amdgpu_device pointer
* @type: Type of dmub notification
@@ -1757,25 +1771,41 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device *
static enum dmub_ips_disable_type dm_get_default_ips_mode(
struct amdgpu_device *adev)
{
- /*
- * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to
- * cause a hard hang. A fix exists for newer PMFW.
- *
- * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest
- * IPS state in all cases, except for s0ix and all displays off (DPMS),
- * where IPS2 is allowed.
- *
- * When checking pmfw version, use the major and minor only.
- */
- if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(3, 5, 0) &&
- (adev->pm.fw_version & 0x00FFFF00) < 0x005D6300)
- return DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ enum dmub_ips_disable_type ret = DMUB_IPS_ENABLE;
- if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0))
- return DMUB_IPS_ENABLE;
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 5, 0):
+ /*
+ * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to
+ * cause a hard hang. A fix exists for newer PMFW.
+ *
+ * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest
+ * IPS state in all cases, except for s0ix and all displays off (DPMS),
+ * where IPS2 is allowed.
+ *
+ * When checking pmfw version, use the major and minor only.
+ */
+ if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300)
+ ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0))
+ /*
+ * Other ASICs with DCN35 that have residency issues with
+ * IPS2 in idle.
+ * We want them to use IPS2 only in display off cases.
+ */
+ ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ break;
+ case IP_VERSION(3, 5, 1):
+ ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ break;
+ default:
+ /* ASICs older than DCN35 do not have IPSs */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 5, 0))
+ ret = DMUB_IPS_DISABLE_ALL;
+ break;
+ }
- /* ASICs older than DCN35 do not have IPSs */
- return DMUB_IPS_DISABLE_ALL;
+ return ret;
}
static int amdgpu_dm_init(struct amdgpu_device *adev)
@@ -3808,6 +3838,12 @@ static int register_hpd_handlers(struct amdgpu_device *adev)
DRM_ERROR("amdgpu: fail to register dmub hpd callback");
return -EINVAL;
}
+
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY,
+ dmub_hpd_sense_callback, true)) {
+ DRM_ERROR("amdgpu: fail to register dmub hpd sense callback");
+ return -EINVAL;
+ }
}
list_for_each_entry(connector,
@@ -4449,6 +4485,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12
#define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255
+#define AMDGPU_DM_MIN_SPREAD ((AMDGPU_DM_DEFAULT_MAX_BACKLIGHT - AMDGPU_DM_DEFAULT_MIN_BACKLIGHT) / 2)
#define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50
static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm,
@@ -4463,6 +4500,21 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm,
return;
amdgpu_acpi_get_backlight_caps(&caps);
+
+ /* validate the firmware value is sane */
+ if (caps.caps_valid) {
+ int spread = caps.max_input_signal - caps.min_input_signal;
+
+ if (caps.max_input_signal > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT ||
+ caps.min_input_signal < 0 ||
+ spread > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT ||
+ spread < AMDGPU_DM_MIN_SPREAD) {
+ DRM_DEBUG_KMS("DM: Invalid backlight caps: min=%d, max=%d\n",
+ caps.min_input_signal, caps.max_input_signal);
+ caps.caps_valid = false;
+ }
+ }
+
if (caps.caps_valid) {
dm->backlight_caps[bl_idx].caps_valid = true;
if (caps.aux_support)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 2d7755e2b6c3..15d4690c74d6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -50,7 +50,7 @@
#define AMDGPU_DM_MAX_NUM_EDP 2
-#define AMDGPU_DMUB_NOTIFICATION_MAX 6
+#define AMDGPU_DMUB_NOTIFICATION_MAX 7
#define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A
#define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index c0c61c03984c..83a31b97e96b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1147,7 +1147,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
params[count].num_slices_v = aconnector->dsc_settings.dsc_num_slices_v;
params[count].bpp_overwrite = aconnector->dsc_settings.dsc_bits_per_pixel;
params[count].compression_possible = stream->sink->dsc_caps.dsc_dec_caps.is_dsc_supported;
- dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy);
+ dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link));
if (!dc_dsc_compute_bandwidth_range(
stream->sink->ctx->dc->res_pool->dscs[0],
stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
@@ -1681,7 +1681,7 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream,
{
struct dc_dsc_policy dsc_policy = {0};
- dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy);
+ dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link));
dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0],
stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
dsc_policy.min_target_bpp * 16,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 25f63b2e7a8e..495e3cd70426 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -961,6 +961,7 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane,
else
domain = AMDGPU_GEM_DOMAIN_VRAM;
+ rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(rbo, domain);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
index 08c494a7a21b..0d5fefb0f591 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -114,6 +114,7 @@ static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector
domain = amdgpu_display_supported_domains(adev, rbo->flags);
+ rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(rbo, domain);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
index e47e9db062f4..681799468487 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
@@ -569,7 +569,7 @@ static void calculate_bandwidth(
break;
}
data->lb_partitions[i] = bw_floor2(bw_div(data->lb_size_per_component[i], data->lb_line_pitch), bw_int_to_fixed(1));
- /*clamp the partitions to the maxium number supported by the lb*/
+ /* clamp the partitions to the maximum number supported by the lb */
if ((surface_type[i] != bw_def_graphics || dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1)) {
data->lb_partitions_max[i] = bw_int_to_fixed(10);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index f770828df149..0e243f4344d0 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -59,6 +59,7 @@ int clk_mgr_helper_get_active_display_cnt(
display_count = 0;
for (i = 0; i < context->stream_count; i++) {
const struct dc_stream_state *stream = context->streams[i];
+ const struct dc_stream_status *stream_status = &context->stream_status[i];
/* Don't count SubVP phantom pipes as part of active
* display count
@@ -66,13 +67,7 @@ int clk_mgr_helper_get_active_display_cnt(
if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM)
continue;
- /*
- * Only notify active stream or virtual stream.
- * Need to notify virtual stream to work around
- * headless case. HPD does not fire when system is in
- * S0i2.
- */
- if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL)
+ if (!stream->dpms_off || (stream_status && stream_status->plane_count))
display_count++;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 97164b5585a8..b46a3afe48ca 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -1222,6 +1222,12 @@ void dcn35_clk_mgr_construct(
ctx->dc->debug.disable_dpp_power_gate = false;
ctx->dc->debug.disable_hubp_power_gate = false;
ctx->dc->debug.disable_dsc_power_gate = false;
+
+ /* Disable dynamic IPS2 in older PMFW (93.12) for Z8 interop. */
+ if (ctx->dc->config.disable_ips == DMUB_IPS_ENABLE &&
+ ctx->dce_version == DCN_VERSION_3_5 &&
+ ((clk_mgr->base.smu_ver & 0x00FFFFFF) <= 0x005d0c00))
+ ctx->dc->config.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
} else {
/*let's reset the config control flag*/
ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index ae788154896c..5c39390ecbd5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1767,7 +1767,7 @@ bool dc_validate_boot_timing(const struct dc *dc,
if (crtc_timing->pix_clk_100hz != pix_clk_100hz)
return false;
- if (!se->funcs->dp_get_pixel_format)
+ if (!se || !se->funcs->dp_get_pixel_format)
return false;
if (!se->funcs->dp_get_pixel_format(
@@ -2376,7 +2376,7 @@ static bool is_surface_in_context(
return false;
}
-static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u)
+static enum surface_update_type get_plane_info_update_type(const struct dc *dc, const struct dc_surface_update *u)
{
union surface_update_flags *update_flags = &u->surface->update_flags;
enum surface_update_type update_type = UPDATE_TYPE_FAST;
@@ -2455,7 +2455,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
/* todo: below are HW dependent, we should add a hook to
* DCE/N resource and validated there.
*/
- if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) {
+ if (!dc->debug.skip_full_updated_if_possible) {
/* swizzled mode requires RQ to be setup properly,
* thus need to run DML to calculate RQ settings
*/
@@ -2547,7 +2547,7 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
update_flags->raw = 0; // Reset all flags
- type = get_plane_info_update_type(u);
+ type = get_plane_info_update_type(dc, u);
elevate_update_type(&overall_type, type);
type = get_scaling_info_update_type(dc, u);
@@ -2596,6 +2596,12 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
elevate_update_type(&overall_type, UPDATE_TYPE_MED);
}
+ if (u->sdr_white_level_nits)
+ if (u->sdr_white_level_nits != u->surface->sdr_white_level_nits) {
+ update_flags->bits.sdr_white_level_nits = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL);
+ }
+
if (u->cm2_params) {
if ((u->cm2_params->component_settings.shaper_3dlut_setting
!= u->surface->mcm_shaper_3dlut_setting)
@@ -2876,6 +2882,10 @@ static void copy_surface_update_to_plane(
surface->hdr_mult =
srf_update->hdr_mult;
+ if (srf_update->sdr_white_level_nits)
+ surface->sdr_white_level_nits =
+ srf_update->sdr_white_level_nits;
+
if (srf_update->blend_tf)
memcpy(&surface->blend_tf, srf_update->blend_tf,
sizeof(surface->blend_tf));
@@ -4679,6 +4689,8 @@ static bool full_update_required(struct dc *dc,
srf_updates[i].scaling_info ||
(srf_updates[i].hdr_mult.value &&
srf_updates[i].hdr_mult.value != srf_updates->surface->hdr_mult.value) ||
+ (srf_updates[i].sdr_white_level_nits &&
+ srf_updates[i].sdr_white_level_nits != srf_updates->surface->sdr_white_level_nits) ||
srf_updates[i].in_transfer_func ||
srf_updates[i].func_shaper ||
srf_updates[i].lut3d_func ||
@@ -5744,6 +5756,27 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
}
/**
+ * dc_process_dmub_dpia_set_tps_notification - Submits tps notification
+ *
+ * @dc: [in] dc structure
+ * @link_index: [in] link index
+ * @tps: [in] request tps
+ *
+ * Submits set_tps_notification command to dmub via inbox message
+ */
+void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps)
+{
+ union dmub_rb_cmd cmd = {0};
+
+ cmd.set_tps_notification.header.type = DMUB_CMD__DPIA;
+ cmd.set_tps_notification.header.sub_type = DMUB_CMD__DPIA_SET_TPS_NOTIFICATION;
+ cmd.set_tps_notification.tps_notification.instance = dc->links[link_index]->ddc_hw_inst;
+ cmd.set_tps_notification.tps_notification.tps = tps;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/**
* dc_process_dmub_dpia_hpd_int_enable - Submits DPIA DPD interruption
*
* @dc: [in] dc structure
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 4c94dd38be4b..3992ad73165b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -55,7 +55,7 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
-#define DC_VER "3.2.299"
+#define DC_VER "3.2.301"
#define MAX_SURFACES 3
#define MAX_PLANES 6
@@ -462,6 +462,7 @@ struct dc_config {
bool support_edp0_on_dp1;
unsigned int enable_fpo_flicker_detection;
bool disable_hbr_audio_dp2;
+ bool consolidated_dpia_dp_lt;
};
enum visual_confirm {
@@ -762,7 +763,8 @@ union dpia_debug_options {
uint32_t disable_mst_dsc_work_around:1; /* bit 3 */
uint32_t enable_force_tbt3_work_around:1; /* bit 4 */
uint32_t disable_usb4_pm_support:1; /* bit 5 */
- uint32_t reserved:26;
+ uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */
+ uint32_t reserved:25;
} bits;
uint32_t raw;
};
@@ -1056,6 +1058,9 @@ struct dc_debug_options {
unsigned int force_lls;
bool notify_dpia_hr_bw;
bool enable_ips_visual_confirm;
+ unsigned int sharpen_policy;
+ unsigned int scale_to_sharpness_policy;
+ bool skip_full_updated_if_possible;
};
@@ -1269,6 +1274,7 @@ union surface_update_flags {
uint32_t tmz_changed:1;
uint32_t mcm_transfer_function_enable_change:1; /* disable or enable MCM transfer func */
uint32_t full_update:1;
+ uint32_t sdr_white_level_nits:1;
} bits;
uint32_t raw;
@@ -1351,6 +1357,7 @@ struct dc_plane_state {
bool adaptive_sharpness_en;
int sharpness_level;
enum linear_light_scaling linear_light_scaling;
+ unsigned int sdr_white_level_nits;
};
struct dc_plane_info {
@@ -1508,6 +1515,7 @@ struct dc_surface_update {
*/
struct dc_cm2_parameters *cm2_params;
const struct dc_csc_transform *cursor_csc_color_matrix;
+ unsigned int sdr_white_level_nits;
};
/*
@@ -2520,6 +2528,8 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
uint8_t mst_alloc_slots,
uint8_t *mst_slots_in_use);
+void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps);
+
void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
uint32_t hpd_int_enable);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index 519c3df78ee5..41bd95e9177a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -969,6 +969,14 @@ union dp_sink_video_fallback_formats {
uint8_t raw;
};
+union dpcd_max_uncompressed_pixel_rate_cap {
+ struct {
+ uint16_t max_uncompressed_pixel_rate_cap :15;
+ uint16_t valid :1;
+ } bits;
+ uint8_t raw[2];
+};
+
union dp_fec_capability1 {
struct {
uint8_t AGGREGATED_ERROR_COUNTERS_CAPABLE :1;
@@ -1170,6 +1178,7 @@ struct dpcd_caps {
struct dc_lttpr_caps lttpr_caps;
struct adaptive_sync_caps adaptive_sync_caps;
struct dpcd_usb4_dp_tunneling_info usb4_dp_tun_info;
+ union dpcd_max_uncompressed_pixel_rate_cap max_uncompressed_pixel_rate_cap;
union dp_128b_132b_supported_link_rates dp_128b_132b_supported_link_rates;
union dp_main_line_channel_coding_cap channel_coding_cap;
@@ -1340,6 +1349,9 @@ struct dp_trace {
#ifndef DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX
#define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX 0x110
#endif
+#ifndef DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP
+#define DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP 0x221c
+#endif
#ifndef DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE
#define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h
index fe3078b8789e..9014c2409817 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h
@@ -59,6 +59,7 @@ struct dc_dsc_config_options {
uint32_t max_target_bpp_limit_override_x16;
uint32_t slice_height_granularity;
uint32_t dsc_force_odm_hslice_override;
+ bool force_dsc_when_not_needed;
};
bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
@@ -100,7 +101,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps(
*/
void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
uint32_t max_target_bpp_limit_override_x16,
- struct dc_dsc_policy *policy);
+ struct dc_dsc_policy *policy,
+ const enum dc_link_encoding_format link_encoding);
void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
index cd6de93eb91c..603552dbd771 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
@@ -186,19 +186,17 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl
spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active;
spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active;
+
+ spl_in->debug.sharpen_policy = (enum sharpen_policy)pipe_ctx->stream->ctx->dc->debug.sharpen_policy;
+ spl_in->debug.scale_to_sharpness_policy =
+ (enum scale_to_sharpness_policy)pipe_ctx->stream->ctx->dc->debug.scale_to_sharpness_policy;
+
/* Check if it is stream is in fullscreen and if its HDR.
* Use this to determine sharpness levels
*/
spl_in->is_fullscreen = dm_helpers_is_fullscreen(pipe_ctx->stream->ctx, pipe_ctx->stream);
spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream);
- spl_in->hdr_multx100 = 0;
- if (spl_in->is_hdr_on) {
- spl_in->hdr_multx100 = (uint32_t)dc_fixpt_floor(dc_fixpt_mul(plane_state->hdr_mult,
- dc_fixpt_from_int(100)));
- /* Disable sharpness for HDR Mult > 6.0 */
- if (spl_in->hdr_multx100 > 600)
- spl_in->adaptive_sharpness.enable = false;
- }
+ spl_in->sdr_white_level_nits = plane_state->sdr_white_level_nits;
}
/// @brief Translate SPL output parameters to pipe context
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
index e7019c95ba79..4fce64a030b6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
@@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
if (swath_height_c > 0)
log2_swath_height_c = dml_log2(swath_height_c);
-
- if (req128_c && log2_swath_height_c > 0)
- log2_swath_height_c -= 1;
}
rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
index ae5251041728..3fa9a5da02f6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
@@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
if (swath_height_c > 0)
log2_swath_height_c = dml_log2(swath_height_c);
-
- if (req128_c && log2_swath_height_c > 0)
- log2_swath_height_c -= 1;
}
rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 0b132ce1d2cd..2b275e680379 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -1924,15 +1924,6 @@ static unsigned int CalculateVMAndRowBytes(
*PixelPTEReqWidth = 32768.0 / BytePerPixel;
*PTERequestSize = 64;
FractionOfPTEReturnDrop = 0;
- } else if (MacroTileSizeBytes == 4096) {
- PixelPTEReqHeightPTEs = 1;
- *PixelPTEReqHeight = MacroTileHeight;
- *PixelPTEReqWidth = 8 * *MacroTileWidth;
- *PTERequestSize = 64;
- if (ScanDirection != dm_vert)
- FractionOfPTEReturnDrop = 0;
- else
- FractionOfPTEReturnDrop = 7.0 / 8;
} else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeightPTEs = 16;
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index 547dfcc80fde..d851c081e376 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -8926,7 +8926,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
// The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement
// If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature
- // if possible, then will try to program for the best power saving features in order of diffculty (dram, fclk, stutter)
+ // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter)
s->iteration = 0;
s->MaxTotalRDBandwidth = 0;
s->AllPrefetchModeTested = false;
@@ -9977,7 +9977,7 @@ void dml_core_get_row_heights(
dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
#endif
- // just suppluy with enough parameters to calculate meta and dte
+ // just supply with enough parameters to calculate meta and dte
CalculateVMAndRowBytes(
0, // dml_bool_t ViewportStationary,
1, // dml_bool_t DCCEnable,
@@ -10110,7 +10110,7 @@ dml_bool_t dml_mode_support(
/// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK
/// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values.
/// @param state_idx Power state idx chosen
-/// @param display_cfg Display Congiuration
+/// @param display_cfg Display Configuration
/// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming
/// TODO: Add clk_cfg input, could be useful for standalone mode
dml_bool_t dml_mode_programming(
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index b0d9aed0f265..8697eac1e1f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -858,7 +858,9 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm
plane->immediate_flip = plane_state->flip_immediate;
- plane->composition.rect_out_height_spans_vactive = plane_state->dst_rect.height >= stream->timing.v_addressable;
+ plane->composition.rect_out_height_spans_vactive =
+ plane_state->dst_rect.height >= stream->timing.v_addressable &&
+ stream->dst.height >= stream->timing.v_addressable;
}
//TODO : Could be possibly moved to a common helper layer.
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
index d63558ee3135..1cf9015e854a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
@@ -940,9 +940,11 @@ static void build_synchronized_timing_groups(
/* find synchronizable timing groups */
for (j = i + 1; j < display_config->display_config.num_streams; j++) {
if (memcmp(master_timing,
- &display_config->display_config.stream_descriptors[j].timing,
- sizeof(struct dml2_timing_cfg)) == 0 &&
- display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder) {
+ &display_config->display_config.stream_descriptors[j].timing,
+ sizeof(struct dml2_timing_cfg)) == 0 &&
+ display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder &&
+ (display_config->display_config.stream_descriptors[i].output.output_encoder != dml2_hdmi || //hdmi requires formats match
+ display_config->display_config.stream_descriptors[i].output.output_format == display_config->display_config.stream_descriptors[j].output.output_format)) {
set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j);
set_bit_in_bitfield(&stream_mapped_mask, j);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index a1727e5bf024..ebd5df1a36e8 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -668,6 +668,7 @@ static bool decide_dsc_bandwidth_range(
*/
static bool decide_dsc_target_bpp_x16(
const struct dc_dsc_policy *policy,
+ const struct dc_dsc_config_options *options,
const struct dsc_enc_caps *dsc_common_caps,
const int target_bandwidth_kbps,
const struct dc_crtc_timing *timing,
@@ -682,7 +683,7 @@ static bool decide_dsc_target_bpp_x16(
if (decide_dsc_bandwidth_range(policy->min_target_bpp * 16, policy->max_target_bpp * 16,
num_slices_h, dsc_common_caps, timing, link_encoding, &range)) {
if (target_bandwidth_kbps >= range.stream_kbps) {
- if (policy->enable_dsc_when_not_needed)
+ if (policy->enable_dsc_when_not_needed || options->force_dsc_when_not_needed)
/* enable max bpp even dsc is not needed */
*target_bpp_x16 = range.max_target_bpp_x16;
} else if (target_bandwidth_kbps >= range.max_kbps) {
@@ -882,7 +883,7 @@ static bool setup_dsc_config(
memset(dsc_cfg, 0, sizeof(struct dc_dsc_config));
- dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy);
+ dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy, link_encoding);
pic_width = timing->h_addressable + timing->h_border_left + timing->h_border_right;
pic_height = timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
@@ -1080,6 +1081,7 @@ static bool setup_dsc_config(
if (target_bandwidth_kbps > 0) {
is_dsc_possible = decide_dsc_target_bpp_x16(
&policy,
+ options,
&dsc_common_caps,
target_bandwidth_kbps,
timing,
@@ -1171,7 +1173,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps(
void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
uint32_t max_target_bpp_limit_override_x16,
- struct dc_dsc_policy *policy)
+ struct dc_dsc_policy *policy,
+ const enum dc_link_encoding_format link_encoding)
{
uint32_t bpc = 0;
@@ -1235,10 +1238,7 @@ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
policy->max_target_bpp = max_target_bpp_limit_override_x16 / 16;
/* enable DSC when not needed, default false */
- if (dsc_policy_enable_dsc_when_not_needed)
- policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed;
- else
- policy->enable_dsc_when_not_needed = false;
+ policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed;
}
void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit)
@@ -1267,4 +1267,5 @@ void dc_dsc_get_default_config_option(const struct dc *dc, struct dc_dsc_config_
options->dsc_force_odm_hslice_override = dc->debug.force_odm_combine;
options->max_target_bpp_limit_override_x16 = 0;
options->slice_height_granularity = 1;
+ options->force_dsc_when_not_needed = false;
}
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c
index 6293173ba2b9..5eb3da8d5206 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c
@@ -545,6 +545,7 @@ static void hubbub35_init(struct hubbub *hubbub)
DCHUBBUB_ARB_MAX_REQ_OUTSTAND, 256,
DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 256);
+ memset(&hubbub2->watermarks.a.cstate_pstate, 0, sizeof(hubbub2->watermarks.a.cstate_pstate));
}
/*static void hubbub35_set_request_limit(struct hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index d52ce58c6a98..4fbed0298adf 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -57,6 +57,7 @@
#include "panel_cntl.h"
#include "dc_state_priv.h"
#include "dpcd_defs.h"
+#include "dsc.h"
/* include DCE11 register header files */
#include "dce/dce_11_0_d.h"
#include "dce/dce_11_0_sh_mask.h"
@@ -1823,6 +1824,48 @@ static void get_edp_links_with_sink(
}
}
+static void clean_up_dsc_blocks(struct dc *dc)
+{
+ struct display_stream_compressor *dsc = NULL;
+ struct timing_generator *tg = NULL;
+ struct stream_encoder *se = NULL;
+ struct dccg *dccg = dc->res_pool->dccg;
+ struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
+ int i;
+
+ if (dc->ctx->dce_version != DCN_VERSION_3_5 &&
+ dc->ctx->dce_version != DCN_VERSION_3_51)
+ return;
+
+ for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
+ struct dcn_dsc_state s = {0};
+
+ dsc = dc->res_pool->dscs[i];
+ dsc->funcs->dsc_read_state(dsc, &s);
+ if (s.dsc_fw_en) {
+ /* disable DSC in OPTC */
+ if (i < dc->res_pool->timing_generator_count) {
+ tg = dc->res_pool->timing_generators[i];
+ tg->funcs->set_dsc_config(tg, OPTC_DSC_DISABLED, 0, 0);
+ }
+ /* disable DSC in stream encoder */
+ if (i < dc->res_pool->stream_enc_count) {
+ se = dc->res_pool->stream_enc[i];
+ se->funcs->dp_set_dsc_config(se, OPTC_DSC_DISABLED, 0, 0);
+ se->funcs->dp_set_dsc_pps_info_packet(se, false, NULL, true);
+ }
+ /* disable DSC block */
+ if (dccg->funcs->set_ref_dscclk)
+ dccg->funcs->set_ref_dscclk(dccg, dsc->inst);
+ dsc->funcs->dsc_disable(dsc);
+
+ /* power down DSC */
+ if (pg_cntl != NULL)
+ pg_cntl->funcs->dsc_pg_control(pg_cntl, dsc->inst, false);
+ }
+ }
+}
+
/*
* When ASIC goes from VBIOS/VGA mode to driver/accelerated mode we need:
* 1. Power down all DC HW blocks
@@ -1927,6 +1970,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
power_down_all_hw_blocks(dc);
+
+ /* DSC could be enabled on eDP during VBIOS post.
+ * To clean up dsc blocks if eDP is in link but not active.
+ */
+ if (edp_link_with_sink && (edp_stream_num == 0))
+ clean_up_dsc_blocks(dc);
+
disable_vga_and_power_gate_all_controllers(dc);
if (edp_link_with_sink && !keep_edp_vdd_on)
dc->hwss.edp_power_control(edp_link_with_sink, false);
@@ -2046,13 +2096,20 @@ static void set_drr(struct pipe_ctx **pipe_ctx,
* as well.
*/
for (i = 0; i < num_pipes; i++) {
- pipe_ctx[i]->stream_res.tg->funcs->set_drr(
- pipe_ctx[i]->stream_res.tg, &params);
-
- if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
- pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
- pipe_ctx[i]->stream_res.tg,
- event_triggers, num_frames);
+ /* dc_state_destruct() might null the stream resources, so fetch tg
+ * here first to avoid a race condition. The lifetime of the pointee
+ * itself (the timing_generator object) is not a problem here.
+ */
+ struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
+
+ if ((tg != NULL) && tg->funcs) {
+ if (tg->funcs->set_drr)
+ tg->funcs->set_drr(tg, &params);
+ if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
+ if (tg->funcs->set_static_screen_control)
+ tg->funcs->set_static_screen_control(
+ tg, event_triggers, num_frames);
+ }
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
index 42c52284a868..bded33575493 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
@@ -455,7 +455,7 @@ bool dcn30_mmhubbub_warmup(
struct mcif_wb *mcif_wb;
struct mcif_warmup_params warmup_params = {0};
unsigned int i, i_buf;
- /*make sure there is no active DWB eanbled */
+ /* make sure there is no active DWB enabled */
for (i = 0; i < num_dwb; i++) {
dwb = dc->res_pool->dwbc[wb_info[i].dwb_pipe_inst];
if (dwb->dwb_is_efc_transition || dwb->dwb_is_drc) {
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index a36e11606f90..2e8c9f738259 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -1032,6 +1032,20 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
struct dsc_config dsc_cfg;
struct dsc_optc_config dsc_optc_cfg = {0};
enum optc_dsc_mode optc_dsc_mode;
+ struct dcn_dsc_state dsc_state = {0};
+
+ if (!dsc) {
+ DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+
+ if (dsc->funcs->dsc_read_state) {
+ dsc->funcs->dsc_read_state(dsc, &dsc_state);
+ if (!dsc_state.dsc_fw_en) {
+ DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+ }
/* Enable DSC hw block */
dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 479fd3e89e5a..bd309dbdf7b2 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -334,7 +334,20 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
struct dsc_config dsc_cfg;
struct dsc_optc_config dsc_optc_cfg = {0};
enum optc_dsc_mode optc_dsc_mode;
+ struct dcn_dsc_state dsc_state = {0};
+ if (!dsc) {
+ DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+
+ if (dsc->funcs->dsc_read_state) {
+ dsc->funcs->dsc_read_state(dsc, &dsc_state);
+ if (!dsc_state.dsc_fw_en) {
+ DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+ }
/* Enable DSC hw block */
dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c
index 46fb3649bc86..6499807af72a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c
@@ -50,8 +50,31 @@ static void update_dpia_stream_allocation_table(struct dc_link *link,
DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n",
status, mst_alloc_slots, prev_mst_slots_in_use);
- ASSERT(link_enc);
- link_enc->funcs->update_mst_stream_allocation_table(link_enc, table);
+ if (link_enc)
+ link_enc->funcs->update_mst_stream_allocation_table(link_enc, table);
+}
+
+static void set_dio_dpia_link_test_pattern(struct dc_link *link,
+ const struct link_resource *link_res,
+ struct encoder_set_dp_phy_pattern_param *tp_params)
+{
+ if (tp_params->dp_phy_pattern != DP_TEST_PATTERN_VIDEO_MODE)
+ return;
+
+ struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
+
+ if (!link_enc)
+ return;
+
+ link_enc->funcs->dp_set_phy_pattern(link_enc, tp_params);
+ link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_SET_SOURCE_PATTERN);
+}
+
+static void set_dio_dpia_lane_settings(struct dc_link *link,
+ const struct link_resource *link_res,
+ const struct dc_link_settings *link_settings,
+ const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX])
+{
}
static const struct link_hwss dpia_link_hwss = {
@@ -65,8 +88,8 @@ static const struct link_hwss dpia_link_hwss = {
.ext = {
.set_throttled_vcp_size = set_dio_throttled_vcp_size,
.enable_dp_link_output = enable_dio_dp_link_output,
- .set_dp_link_test_pattern = set_dio_dp_link_test_pattern,
- .set_dp_lane_settings = set_dio_dp_lane_settings,
+ .set_dp_link_test_pattern = set_dio_dpia_link_test_pattern,
+ .set_dp_lane_settings = set_dio_dpia_lane_settings,
.update_stream_allocation_table = update_dpia_stream_allocation_table,
},
};
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
index 1aed55b0ab6a..60f15a9ba7a5 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
@@ -287,6 +287,13 @@ static bool dp_validate_mode_timing(
req_bw = dc_bandwidth_in_kbps_from_timing(timing, dc_link_get_highest_encoding_format(link));
max_bw = dp_link_bandwidth_kbps(link, link_setting);
+ bool is_max_uncompressed_pixel_rate_exceeded = link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.valid &&
+ timing->pix_clk_100hz > link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.max_uncompressed_pixel_rate_cap * 10000;
+
+ if (is_max_uncompressed_pixel_rate_exceeded && !timing->flags.DSC) {
+ return false;
+ }
+
if (req_bw <= max_bw) {
/* remember the biggest mode here, during
* initial link training (to get
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
index 34a618a7278b..d78c8ec4de79 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
@@ -1942,6 +1942,11 @@ static bool retrieve_link_cap(struct dc_link *link)
DC_LOG_DP2("\tFEC aggregated error counters are supported");
}
+ core_link_read_dpcd(link,
+ DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP,
+ link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw,
+ sizeof(link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw));
+
retrieve_cable_id(link);
dpcd_write_cable_id_to_dprx(link);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index 988999c44475..27b881f947e8 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -515,6 +515,41 @@ bool dp_is_interlane_aligned(union lane_align_status_updated align_status)
return align_status.bits.INTERLANE_ALIGN_DONE == 1;
}
+bool dp_check_interlane_aligned(union lane_align_status_updated align_status,
+ struct dc_link *link,
+ uint8_t retries)
+{
+ /* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4
+ * has to share encoders unlike DP and USBC
+ */
+ return (dp_is_interlane_aligned(align_status) ||
+ (link->skip_fallback_on_link_loss && retries));
+}
+
+uint32_t dp_get_eq_aux_rd_interval(
+ const struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint32_t offset,
+ uint8_t retries)
+{
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+ if (offset == 0 && retries == 1 && lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT)
+ return max(lt_settings->eq_pattern_time, (uint32_t) DPIA_CLK_SYNC_DELAY);
+ else
+ return dpia_get_eq_aux_rd_interval(link, lt_settings, offset);
+ } else if (is_repeater(lt_settings, offset))
+ return dp_translate_training_aux_read_interval(
+ link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]);
+ else
+ return lt_settings->eq_pattern_time;
+}
+
+bool dp_check_dpcd_reqeust_status(const struct dc_link *link,
+ enum dc_status status)
+{
+ return (status != DC_OK && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA);
+}
+
enum link_training_result dp_check_link_loss_status(
struct dc_link *link,
const struct link_training_settings *link_training_setting)
@@ -973,13 +1008,17 @@ void repeater_training_done(struct dc_link *link, uint32_t offset)
dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
}
-static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding)
+static enum link_training_result dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding)
{
+ enum dc_status status;
uint8_t sink_status = 0;
uint8_t i;
/* clear training pattern set */
- dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
+ status = dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
+
+ if (dp_check_dpcd_reqeust_status(link, status))
+ return LINK_TRAINING_ABORT;
if (encoding == DP_128b_132b_ENCODING) {
/* poll for intra-hop disable */
@@ -990,6 +1029,8 @@ static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding
fsleep(1000);
}
}
+
+ return LINK_TRAINING_SUCCESS;
}
enum dc_status dpcd_configure_channel_coding(struct dc_link *link,
@@ -1013,17 +1054,18 @@ enum dc_status dpcd_configure_channel_coding(struct dc_link *link,
return status;
}
-void dpcd_set_training_pattern(
+enum dc_status dpcd_set_training_pattern(
struct dc_link *link,
enum dc_dp_training_pattern training_pattern)
{
+ enum dc_status status;
union dpcd_training_pattern dpcd_pattern = {0};
dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
dp_training_pattern_to_dpcd_training_pattern(
link, training_pattern);
- core_link_write_dpcd(
+ status = core_link_write_dpcd(
link,
DP_TRAINING_PATTERN_SET,
&dpcd_pattern.raw,
@@ -1033,6 +1075,8 @@ void dpcd_set_training_pattern(
__func__,
DP_TRAINING_PATTERN_SET,
dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
+
+ return status;
}
enum dc_status dpcd_set_link_settings(
@@ -1185,6 +1229,13 @@ void dpcd_set_lt_pattern_and_lane_settings(
dpcd_lt_buffer[DP_TRAINING_PATTERN_SET - DP_TRAINING_PATTERN_SET]
= dpcd_pattern.raw;
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+ dpia_set_tps_notification(
+ link,
+ lt_settings,
+ dpcd_pattern.v1_4.TRAINING_PATTERN_SET,
+ offset);
+
if (is_repeater(lt_settings, offset)) {
DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n 0x%X pattern = %x\n",
__func__,
@@ -1455,7 +1506,8 @@ static enum link_training_result dp_transition_to_video_idle(
*/
if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) {
msleep(5);
- status = dp_check_link_loss_status(link, lt_settings);
+ if (!link->skip_fallback_on_link_loss)
+ status = dp_check_link_loss_status(link, lt_settings);
}
return status;
}
@@ -1521,7 +1573,9 @@ enum link_training_result dp_perform_link_training(
ASSERT(0);
/* exit training mode */
- dpcd_exit_training_mode(link, encoding);
+ if ((dpcd_exit_training_mode(link, encoding) != LINK_TRAINING_SUCCESS || status == LINK_TRAINING_ABORT) &&
+ link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+ dpia_training_abort(link, &lt_settings, 0);
/* switch to video idle */
if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern)
@@ -1599,8 +1653,7 @@ bool perform_link_training_with_retries(
dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings);
return true;
} else {
- /** @todo Consolidate USB4 DP and DPx.x training. */
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+ if (!link->dc->config.consolidated_dpia_dp_lt && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
status = dpia_perform_link_training(
link,
&pipe_ctx->link_res,
@@ -1629,8 +1682,17 @@ bool perform_link_training_with_retries(
dp_trace_lt_total_count_increment(link, false);
dp_trace_lt_result_update(link, status, false);
dp_trace_set_lt_end_timestamp(link, false);
- if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low)
+ if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) {
+ // Update verified link settings to current one
+ // Because DPIA LT might fallback to lower link setting.
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+ link->verified_link_cap.link_rate = link->cur_link_settings.link_rate;
+ link->verified_link_cap.lane_count = link->cur_link_settings.lane_count;
+ dm_helpers_dp_mst_update_branch_bandwidth(link->ctx, link);
+ }
return true;
+ }
}
fail_count++;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
index 851bd17317a0..0b18aa35c33c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
@@ -55,7 +55,7 @@ void dp_set_hw_test_pattern(
uint8_t *custom_pattern,
uint32_t custom_pattern_size);
-void dpcd_set_training_pattern(
+enum dc_status dpcd_set_training_pattern(
struct dc_link *link,
enum dc_dp_training_pattern training_pattern);
@@ -182,4 +182,18 @@ uint32_t dp_translate_training_aux_read_interval(
uint8_t dp_get_nibble_at_index(const uint8_t *buf,
uint32_t index);
+
+bool dp_check_interlane_aligned(union lane_align_status_updated align_status,
+ struct dc_link *link,
+ uint8_t retries);
+
+uint32_t dp_get_eq_aux_rd_interval(
+ const struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint32_t offset,
+ uint8_t retries);
+
+bool dp_check_dpcd_reqeust_status(const struct dc_link *link,
+ enum dc_status status);
+
#endif /* __DC_LINK_DP_TRAINING_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c
index 2b4c15b0b407..3bdce32a85e3 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c
@@ -157,6 +157,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence(
struct link_training_settings *lt_settings,
uint32_t offset)
{
+ enum dc_status status;
uint32_t retries_cr;
uint32_t retry_count;
uint32_t wait_time_microsec;
@@ -216,7 +217,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence(
/* 4. Read lane status and requested drive
* settings as set by the sink
*/
- dp_get_lane_status_and_lane_adjust(
+ status = dp_get_lane_status_and_lane_adjust(
link,
lt_settings,
dpcd_lane_status,
@@ -224,6 +225,9 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence(
dpcd_lane_adjust,
offset);
+ if (dp_check_dpcd_reqeust_status(link, status))
+ return LINK_TRAINING_ABORT;
+
/* 5. check CR done*/
if (dp_is_cr_done(lane_count, dpcd_lane_status)) {
DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__);
@@ -273,6 +277,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
struct link_training_settings *lt_settings,
uint32_t offset)
{
+ enum dc_status status;
enum dc_dp_training_pattern tr_pattern;
uint32_t retries_ch_eq;
uint32_t wait_time_microsec;
@@ -308,12 +313,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
dpcd_set_lane_settings(link, lt_settings, offset);
/* 3. wait for receiver to lock-on*/
- wait_time_microsec = lt_settings->eq_pattern_time;
-
- if (is_repeater(lt_settings, offset))
- wait_time_microsec =
- dp_translate_training_aux_read_interval(
- link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]);
+ wait_time_microsec = dp_get_eq_aux_rd_interval(link, lt_settings, offset, retries_ch_eq);
dp_wait_for_training_aux_rd_interval(
link,
@@ -322,7 +322,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
/* 4. Read lane status and requested
* drive settings as set by the sink*/
- dp_get_lane_status_and_lane_adjust(
+ status = dp_get_lane_status_and_lane_adjust(
link,
lt_settings,
dpcd_lane_status,
@@ -330,6 +330,9 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
dpcd_lane_adjust,
offset);
+ if (dp_check_dpcd_reqeust_status(link, status))
+ return LINK_TRAINING_ABORT;
+
/* 5. check CR done*/
if (!dp_is_cr_done(lane_count, dpcd_lane_status))
return dpcd_lane_status[0].bits.CR_DONE_0 ?
@@ -339,7 +342,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
/* 6. check CHEQ done*/
if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) &&
dp_is_symbol_locked(lane_count, dpcd_lane_status) &&
- dp_is_interlane_aligned(dpcd_lane_status_updated))
+ dp_check_interlane_aligned(dpcd_lane_status_updated, link, retries_ch_eq))
return LINK_TRAINING_SUCCESS;
/* 7. update VS/PE/PC2 in lt_settings*/
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
index cd1975c03f38..39e4b7dc9588 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
@@ -43,9 +43,6 @@
#define DC_LOGGER \
link->ctx->logger
-/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */
-#define DPIA_CLK_SYNC_DELAY 16000
-
/* Extend interval between training status checks for manual testing. */
#define DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US 60000000
@@ -566,28 +563,6 @@ static enum link_training_result dpia_training_cr_phase(
return result;
}
-/* Return status read interval during equalization phase. */
-static uint32_t dpia_get_eq_aux_rd_interval(
- const struct dc_link *link,
- const struct link_training_settings *lt_settings,
- uint32_t hop)
-{
- uint32_t wait_time_microsec;
-
- if (hop == DPRX)
- wait_time_microsec = lt_settings->eq_pattern_time;
- else
- wait_time_microsec =
- dp_translate_training_aux_read_interval(
- link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]);
-
- /* Check debug option for extending aux read interval. */
- if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval)
- wait_time_microsec = DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US;
-
- return wait_time_microsec;
-}
-
/* Execute equalization phase of link training for specified hop in display
* path in non-transparent mode:
* - driver issues both DPCD and SET_CONFIG transactions.
@@ -936,6 +911,22 @@ static enum link_training_result dpia_training_end(
return result;
}
+/* Return status read interval during equalization phase. */
+uint32_t dpia_get_eq_aux_rd_interval(
+ const struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint32_t hop)
+{
+ /* Check debug option for extending aux read interval. */
+ if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval)
+ return DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US;
+ else if (hop == DPRX)
+ return lt_settings->eq_pattern_time;
+ else
+ return dp_translate_training_aux_read_interval(
+ link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]);
+}
+
/* When aborting training of specified hop in display path, clean up by:
* - Attempting to clear DPCD TRAINING_PATTERN_SET, LINK_BW_SET and LANE_COUNT_SET.
* - Sending SET_CONFIG(SET_LINK) with lane count and link rate set to 0.
@@ -943,7 +934,7 @@ static enum link_training_result dpia_training_end(
* @param link DPIA link being trained.
* @param hop Hop in display path. DPRX = 0.
*/
-static void dpia_training_abort(
+void dpia_training_abort(
struct dc_link *link,
struct link_training_settings *lt_settings,
uint32_t hop)
@@ -968,7 +959,26 @@ static void dpia_training_abort(
core_link_write_dpcd(link, dpcd_tps_offset, &data, 1);
core_link_write_dpcd(link, DP_LINK_BW_SET, &data, 1);
core_link_write_dpcd(link, DP_LANE_COUNT_SET, &data, 1);
- core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data);
+
+ if (!link->dc->config.consolidated_dpia_dp_lt)
+ core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data);
+}
+
+void dpia_set_tps_notification(
+ struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint8_t pattern,
+ uint32_t hop)
+{
+ uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */
+
+ if (lt_settings->lttpr_mode != LTTPR_MODE_NON_TRANSPARENT || pattern == DPCD_TRAINING_PATTERN_VIDEOIDLE)
+ return;
+
+ repeater_cnt = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+
+ if (hop != repeater_cnt)
+ dc_process_dmub_dpia_set_tps_notification(link->ctx->dc, link->link_index, pattern);
}
enum link_training_result dpia_perform_link_training(
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h
index b39fb9faf1c2..9f4eceb494c2 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h
@@ -28,6 +28,9 @@
#define __DC_LINK_DP_TRAINING_DPIA_H__
#include "link_dp_training.h"
+/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */
+#define DPIA_CLK_SYNC_DELAY 16000
+
/* Train DP tunneling link for USB4 DPIA display endpoint.
* DPIA equivalent of dc_link_dp_perfrorm_link_training.
* Aborts link training upon detection of sink unplug.
@@ -38,4 +41,20 @@ enum link_training_result dpia_perform_link_training(
const struct dc_link_settings *link_setting,
bool skip_video_pattern);
+void dpia_training_abort(
+ struct dc_link *link,
+ struct link_training_settings *lt_settings,
+ uint32_t hop);
+
+uint32_t dpia_get_eq_aux_rd_interval(
+ const struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint32_t hop);
+
+void dpia_set_tps_notification(
+ struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint8_t pattern,
+ uint32_t offset);
+
#endif /* __DC_LINK_DP_TRAINING_DPIA_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 46ad684fe192..893a9d9ee870 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -2155,6 +2155,7 @@ static bool dcn35_resource_construct(
dc->dml2_options.max_segments_per_hubp = 24;
dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/
+ dc->dml2_options.override_det_buffer_size_kbytes = true;
if (dc->config.sdpif_request_limit_words_per_umc == 0)
dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
index 4c5e722baa3a..da9101b83e8c 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -736,7 +736,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.hdmichar = true,
.dpstream = true,
.symclk32_se = true,
- .symclk32_le = true,
+ .symclk32_le = false,
.symclk_fe = true,
.physymclk = false,
.dpiasymclk = true,
@@ -2133,6 +2133,7 @@ static bool dcn351_resource_construct(
dc->dml2_options.max_segments_per_hubp = 24;
dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/
+ dc->dml2_options.override_det_buffer_size_kbytes = true;
if (dc->config.sdpif_request_limit_words_per_umc == 0)
dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/
diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
index 15f7eda903e6..014e8a296f0c 100644
--- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
+++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
@@ -813,6 +813,14 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch)
return skip_easf;
}
+/* Check if video is in fullscreen mode */
+static bool spl_is_video_fullscreen(struct spl_in *spl_in)
+{
+ if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen)
+ return true;
+ return false;
+}
+
static bool spl_get_isharp_en(struct spl_in *spl_in,
struct spl_scratch *spl_scratch)
{
@@ -820,6 +828,7 @@ static bool spl_get_isharp_en(struct spl_in *spl_in,
int vratio = 0;
int hratio = 0;
struct spl_taps taps = spl_scratch->scl_data.taps;
+ bool fullscreen = spl_is_video_fullscreen(spl_in);
/* Return if adaptive sharpness is disabled */
if (spl_in->adaptive_sharpness.enable == false)
@@ -835,9 +844,18 @@ static bool spl_get_isharp_en(struct spl_in *spl_in,
// Scaling is up to 1:1 (no scaling) or upscaling
/*
- * Apply sharpness to all RGB surfaces and to
- * NV12/P010 surfaces
+ * Apply sharpness to RGB and YUV (NV12/P010)
+ * surfaces based on policy setting
*/
+ if (!spl_is_yuv420(spl_in->basic_in.format) &&
+ (spl_in->debug.sharpen_policy == SHARPEN_YUV))
+ return enable_isharp;
+ else if ((spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) &&
+ (spl_in->debug.sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV))
+ return enable_isharp;
+ else if (!spl_in->is_fullscreen &&
+ spl_in->debug.sharpen_policy == SHARPEN_FULLSCREEN_ALL)
+ return enable_isharp;
/*
* Apply sharpness if supports horizontal taps 4,6 AND
@@ -1155,14 +1173,19 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *sp
}
/* Calculate C0-C3 coefficients based on HDR_mult */
-static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t hdr_multx100)
+static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t sdr_white_level_nits)
{
struct spl_fixed31_32 hdr_mult, c0_mult, c1_mult, c2_mult;
struct spl_fixed31_32 c0_calc, c1_calc, c2_calc;
struct spl_custom_float_format fmt;
+ uint32_t hdr_multx100_int;
- SPL_ASSERT(hdr_multx100);
- hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100, 100LL);
+ if ((sdr_white_level_nits >= 80) && (sdr_white_level_nits <= 480))
+ hdr_multx100_int = sdr_white_level_nits * 100 / 80;
+ else
+ hdr_multx100_int = 100; /* default for 80 nits otherwise */
+
+ hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100_int, 100LL);
c0_mult = spl_fixpt_from_fraction(2126LL, 10000LL);
c1_mult = spl_fixpt_from_fraction(7152LL, 10000LL);
c2_mult = spl_fixpt_from_fraction(722LL, 10000LL);
@@ -1191,7 +1214,7 @@ static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint3
static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v,
bool enable_easf_h, enum linear_light_scaling lls_pref,
enum spl_pixel_format format, enum system_setup setup,
- uint32_t hdr_multx100)
+ uint32_t sdr_white_level_nits)
{
struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data;
if (enable_easf_v) {
@@ -1499,7 +1522,7 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s
dscl_prog_data->easf_ltonl_en = 1; // Linear input
if ((setup == HDR_L) && (spl_is_rgb8(format))) {
/* Calculate C0-C3 coefficients based on HDR multiplier */
- spl_calculate_c0_c3_hdr(dscl_prog_data, hdr_multx100);
+ spl_calculate_c0_c3_hdr(dscl_prog_data, sdr_white_level_nits);
} else { // HDR_L ( DWM ) and SDR_L
dscl_prog_data->easf_matrix_c0 =
0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720)
@@ -1557,7 +1580,7 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data,
struct adaptive_sharpness adp_sharpness, bool enable_isharp,
enum linear_light_scaling lls_pref, enum spl_pixel_format format,
const struct spl_scaler_data *data, struct spl_fixed31_32 ratio,
- enum system_setup setup)
+ enum system_setup setup, enum scale_to_sharpness_policy scale_to_sharpness_policy)
{
/* Turn off sharpener if not required */
if (!enable_isharp) {
@@ -1565,6 +1588,11 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data,
return;
}
+ spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness,
+ scale_to_sharpness_policy);
+ dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup);
+ dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level;
+
dscl_prog_data->isharp_en = 1; // ISHARP_EN
// Set ISHARP_NOISEDET_MODE if htaps = 6-tap
if (data->taps.h_taps == 6) {
@@ -1662,11 +1690,6 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data,
dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format
}
-
- spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness);
- dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup);
- dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level;
-
// Program the nldelta soft clip values
if (lls_pref == LLS_PREF_YES) {
dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */
@@ -1750,7 +1773,7 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out)
// Set EASF
spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref,
- spl_in->basic_in.format, setup, spl_in->hdr_multx100);
+ spl_in->basic_in.format, setup, spl_in->sdr_white_level_nits);
// Set iSHARP
vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert);
@@ -1761,7 +1784,8 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out)
isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz;
spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp,
- spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup);
+ spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup,
+ spl_in->debug.scale_to_sharpness_policy);
return res;
}
diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c
index 33712f50d303..e0572252c640 100644
--- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c
+++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c
@@ -500,6 +500,15 @@ struct isharp_1D_lut_pregen filter_isharp_1D_lut_pregen[NUM_SHARPNESS_SETUPS] =
},
};
+struct scale_ratio_to_sharpness_level_adj sharpness_level_adj[NUM_SHARPNESS_ADJ_LEVELS] = {
+ {1125, 1000, 0},
+ {11, 10, 1},
+ {1075, 1000, 2},
+ {105, 100, 3},
+ {1025, 1000, 4},
+ {1, 1, 5},
+};
+
const uint32_t *spl_get_filter_isharp_1D_lut_0(void)
{
return filter_isharp_1D_lut_0;
@@ -541,19 +550,72 @@ uint16_t *spl_get_filter_isharp_bs_3tap_64p(void)
return filter_isharp_bs_3tap_64p_s1_12;
}
-static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level, enum system_setup setup,
- struct spl_sharpness_range sharpness_range)
+static unsigned int spl_calculate_sharpness_level_adj(struct spl_fixed31_32 ratio)
+{
+ int j;
+ struct spl_fixed31_32 ratio_level;
+ struct scale_ratio_to_sharpness_level_adj *lookup_ptr;
+ unsigned int sharpness_level_down_adj;
+
+ /*
+ * Adjust sharpness level based on current scaling ratio
+ *
+ * We have 5 discrete scaling ratios which we will use to adjust the
+ * sharpness level down by 1 as we pass each ratio. The ratios
+ * are
+ *
+ * 1.125 upscale and higher - no adj
+ * 1.100 - under 1.125 - adj level down 1
+ * 1.075 - under 1.100 - adj level down 2
+ * 1.050 - under 1.075 - adj level down 3
+ * 1.025 - under 1.050 - adj level down 4
+ * 1.000 - under 1.025 - adj level down 5
+ *
+ */
+ j = 0;
+ sharpness_level_down_adj = 0;
+ lookup_ptr = sharpness_level_adj;
+ while (j < NUM_SHARPNESS_ADJ_LEVELS) {
+ ratio_level = spl_fixpt_from_fraction(lookup_ptr->ratio_numer,
+ lookup_ptr->ratio_denom);
+ if (ratio.value >= ratio_level.value) {
+ sharpness_level_down_adj = lookup_ptr->level_down_adj;
+ break;
+ }
+ lookup_ptr++;
+ j++;
+ }
+ return sharpness_level_down_adj;
+}
+
+static unsigned int spl_calculate_sharpness_level(struct spl_fixed31_32 ratio,
+ int discrete_sharpness_level, enum system_setup setup,
+ struct spl_sharpness_range sharpness_range,
+ enum scale_to_sharpness_policy scale_to_sharpness_policy)
{
unsigned int sharpness_level = 0;
+ unsigned int sharpness_level_down_adj = 0;
int min_sharpness, max_sharpness, mid_sharpness;
+ /*
+ * Adjust sharpness level if policy requires we adjust it based on
+ * scale ratio. Based on scale ratio, we may adjust the sharpness
+ * level down by a certain number of steps. We will not select
+ * a sharpness value of 0 so the lowest sharpness level will be
+ * 0 or 1 depending on what the min_sharpness is
+ *
+ * If the policy is no required, this code maybe removed at a later
+ * date
+ */
switch (setup) {
case HDR_L:
min_sharpness = sharpness_range.hdr_rgb_min;
max_sharpness = sharpness_range.hdr_rgb_max;
mid_sharpness = sharpness_range.hdr_rgb_mid;
+ if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL)
+ sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio);
break;
case HDR_NL:
/* currently no use case, use Non-linear SDR values for now */
@@ -561,15 +623,26 @@ static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level,
min_sharpness = sharpness_range.sdr_yuv_min;
max_sharpness = sharpness_range.sdr_yuv_max;
mid_sharpness = sharpness_range.sdr_yuv_mid;
+ if (scale_to_sharpness_policy >= SCALE_TO_SHARPNESS_ADJ_YUV)
+ sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio);
break;
case SDR_L:
default:
min_sharpness = sharpness_range.sdr_rgb_min;
max_sharpness = sharpness_range.sdr_rgb_max;
mid_sharpness = sharpness_range.sdr_rgb_mid;
+ if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL)
+ sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio);
break;
}
+ if ((min_sharpness == 0) && (sharpness_level_down_adj >= discrete_sharpness_level))
+ discrete_sharpness_level = 1;
+ else if (sharpness_level_down_adj >= discrete_sharpness_level)
+ discrete_sharpness_level = 0;
+ else
+ discrete_sharpness_level -= sharpness_level_down_adj;
+
int lower_half_step_size = (mid_sharpness - min_sharpness) / 5;
int upper_half_step_size = (max_sharpness - mid_sharpness) / 5;
@@ -584,7 +657,7 @@ static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level,
}
void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup,
- struct adaptive_sharpness sharpness)
+ struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy)
{
uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst;
struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level;
@@ -594,8 +667,9 @@ void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, en
uint32_t filter_pregen_store[ISHARP_LUT_TABLE_SIZE];
/* Custom sharpnessX1000 value */
- unsigned int sharpnessX1000 = spl_calculate_sharpness_level(sharpness.sharpness_level,
- setup, sharpness.sharpness_range);
+ unsigned int sharpnessX1000 = spl_calculate_sharpness_level(ratio,
+ sharpness.sharpness_level, setup,
+ sharpness.sharpness_range, scale_to_sharpness_policy);
sharp_level = spl_fixpt_from_fraction(sharpnessX1000, 1000);
/*
@@ -606,7 +680,6 @@ void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, en
(filter_isharp_1D_lut_pregen[setup].sharpness_denom == 1000))
return;
-
/*
* Calculate LUT_128_gained with this equation:
*
diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h
index fe0b12571f2c..afcc66206ca2 100644
--- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h
+++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h
@@ -20,11 +20,11 @@ uint16_t *spl_get_filter_isharp_bs_3tap_64p(void);
const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void);
uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps);
-struct scale_ratio_to_sharpness_level_lookup {
+#define NUM_SHARPNESS_ADJ_LEVELS 6
+struct scale_ratio_to_sharpness_level_adj {
unsigned int ratio_numer;
unsigned int ratio_denom;
- unsigned int sharpness_numer;
- unsigned int sharpness_denom;
+ unsigned int level_down_adj; /* adjust sharpness level down */
};
struct isharp_1D_lut_pregen {
@@ -45,6 +45,7 @@ void spl_init_blur_scale_coeffs(void);
void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data,
const struct spl_scaler_data *data);
-void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, struct adaptive_sharpness sharpness);
+void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup,
+ struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy);
uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup);
#endif /* __DC_SPL_ISHARP_FILTERS_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h
index 85b19ebe2c57..2a74ff5fdfdb 100644
--- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h
+++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h
@@ -487,6 +487,17 @@ enum linear_light_scaling { // convert it in translation logic
LLS_PREF_YES,
LLS_PREF_NO
};
+enum sharpen_policy {
+ SHARPEN_ALWAYS = 0,
+ SHARPEN_YUV = 1,
+ SHARPEN_RGB_FULLSCREEN_YUV = 2,
+ SHARPEN_FULLSCREEN_ALL = 3
+};
+enum scale_to_sharpness_policy {
+ NO_SCALE_TO_SHARPNESS_ADJ = 0,
+ SCALE_TO_SHARPNESS_ADJ_YUV = 1,
+ SCALE_TO_SHARPNESS_ADJ_ALL = 2
+};
struct spl_funcs {
void (*spl_calc_lb_num_partitions)
(bool alpha_en,
@@ -499,6 +510,8 @@ struct spl_funcs {
struct spl_debug {
int visual_confirm_base_offset;
int visual_confirm_dpp_offset;
+ enum sharpen_policy sharpen_policy;
+ enum scale_to_sharpness_policy scale_to_sharpness_policy;
};
struct spl_in {
@@ -518,7 +531,7 @@ struct spl_in {
bool is_hdr_on;
int h_active;
int v_active;
- int hdr_multx100;
+ int sdr_white_level_nits;
};
// end of SPL inputs
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index cd70453aeae0..fe5b6f7a3eb1 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -300,6 +300,7 @@ struct dmub_srv_hw_params {
enum dmub_ips_disable_type disable_ips;
bool disallow_phy_access;
bool disable_sldo_opt;
+ bool enable_non_transparent_setconfig;
};
/**
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index e20c220aa8b4..ebcf68bfae2b 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -682,7 +682,7 @@ union dmub_fw_boot_options {
uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/
uint32_t usb4_cm_version: 1; /**< 1 CM support */
uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */
- uint32_t reserved0: 1;
+ uint32_t enable_non_transparent_setconfig: 1; /* 1 if dpia use conventional dp lt flow*/
uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/
uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */
uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/
@@ -1308,6 +1308,7 @@ enum dmub_cmd_dpia_type {
DMUB_CMD__DPIA_DIG1_DPIA_CONTROL = 0,
DMUB_CMD__DPIA_SET_CONFIG_ACCESS = 1,
DMUB_CMD__DPIA_MST_ALLOC_SLOTS = 2,
+ DMUB_CMD__DPIA_SET_TPS_NOTIFICATION = 3,
};
/* DMUB_OUT_CMD__DPIA_NOTIFICATION command types. */
@@ -2139,6 +2140,24 @@ struct dmub_rb_cmd_set_mst_alloc_slots {
};
/**
+ * Data passed from driver to FW in a DMUB_CMD__SET_TPS_NOTIFICATION command.
+ */
+struct dmub_cmd_tps_notification_data {
+ uint8_t instance; /* DPIA instance */
+ uint8_t tps; /* requested training pattern */
+ uint8_t reserved1;
+ uint8_t reserved2;
+};
+
+/**
+ * DMUB command structure for SET_TPS_NOTIFICATION command.
+ */
+struct dmub_rb_cmd_set_tps_notification {
+ struct dmub_cmd_header header; /* header */
+ struct dmub_cmd_tps_notification_data tps_notification; /* set tps_notification data */
+};
+
+/**
* DMUB command structure for DPIA HPD int enable control.
*/
struct dmub_rb_cmd_dpia_hpd_int_enable {
@@ -5305,6 +5324,10 @@ union dmub_rb_cmd {
*/
struct dmub_rb_cmd_set_mst_alloc_slots set_mst_alloc_slots;
/**
+ * Definition of a DMUB_CMD__DPIA_SET_TPS_NOTIFICATION command.
+ */
+ struct dmub_rb_cmd_set_tps_notification set_tps_notification;
+ /**
* Definition of a DMUB_CMD__EDID_CEA command.
*/
struct dmub_rb_cmd_edid_cea edid_cea;
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
index 746696b6f09a..2ccad79053c5 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
@@ -425,6 +425,7 @@ void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu
boot_options.bits.ips_disable = params->disable_ips;
boot_options.bits.ips_sequential_ono = params->ips_sequential_ono;
boot_options.bits.disable_sldo_opt = params->disable_sldo_opt;
+ boot_options.bits.enable_non_transparent_setconfig = params->enable_non_transparent_setconfig;
REG_WRITE(DMCUB_SCRATCH14, boot_options.all);
}
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index a40e6590215a..bbd259cea4f4 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -134,7 +134,7 @@ unsigned int mod_freesync_calc_v_total_from_refresh(
v_total = div64_u64(div64_u64(((unsigned long long)(
frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)),
- stream->timing.h_total), 1000000);
+ stream->timing.h_total) + 500000, 1000000);
/* v_total cannot be less than nominal */
if (v_total < stream->timing.v_total) {
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 745fd052840d..3f91926a50e9 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -85,7 +85,7 @@ enum amd_apu_flags {
* @AMD_IP_BLOCK_TYPE_MES: Micro-Engine Scheduler
* @AMD_IP_BLOCK_TYPE_JPEG: JPEG Engine
* @AMD_IP_BLOCK_TYPE_VPE: Video Processing Engine
-* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Schduler for Multimedia
+* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Scheduler for Multimedia
* @AMD_IP_BLOCK_TYPE_ISP: Image Signal Processor
* @AMD_IP_BLOCK_TYPE_NUM: Total number of IP block types
*/
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 7744ca3ef4b1..e3e635a31b8a 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -71,6 +71,11 @@ enum kgd_memory_pool {
KGD_POOL_FRAMEBUFFER = 3,
};
+struct kfd_cu_occupancy {
+ u32 wave_cnt;
+ u32 doorbell_off;
+};
+
/**
* enum kfd_sched_policy
*
@@ -313,8 +318,9 @@ struct kfd2kgd_calls {
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data);
- void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid,
- int *wave_cnt, int *max_waves_per_cu, uint32_t inst);
+ void (*get_cu_occupancy)(struct amdgpu_device *adev,
+ struct kfd_cu_occupancy *cu_occupancy,
+ int *max_waves_per_cu, uint32_t inst);
void (*program_trap_handler_settings)(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index 0b3c2f54a343..822c6425d90e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -123,7 +123,7 @@ typedef enum {
VOLTAGE_GUARDBAND_COUNT
} GFX_GUARDBAND_e;
-#define SMU_METRICS_TABLE_VERSION 0xC
+#define SMU_METRICS_TABLE_VERSION 0xD
typedef struct __attribute__((packed, aligned(4))) {
uint32_t AccumulationCounter;
@@ -227,6 +227,10 @@ typedef struct __attribute__((packed, aligned(4))) {
// PCIE LINK Speed and width
uint32_t PCIeLinkSpeed;
uint32_t PCIeLinkWidth;
+
+ // PER XCD ACTIVITY
+ uint32_t GfxBusy[8];
+ uint64_t GfxBusyAcc[8];
} MetricsTableX_t;
typedef struct __attribute__((packed, aligned(4))) {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index a887ab945dfa..1d024b122b0c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2569,10 +2569,14 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
}
}
- return smu_cmn_send_smc_msg_with_param(smu,
+ ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_SetWorkloadMask,
workload_mask,
NULL);
+ if (!ret)
+ smu->workload_mask = workload_mask;
+
+ return ret;
}
static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 9974c9f8135e..55ed6247eb61 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2107,8 +2107,12 @@ static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap,
}
mutex_lock(&adev->pm.mutex);
r = smu_v13_0_6_request_i2c_xfer(smu, req);
- if (r)
- goto fail;
+ if (r) {
+ /* Retry once, in case of an i2c collision */
+ r = smu_v13_0_6_request_i2c_xfer(smu, req);
+ if (r)
+ goto fail;
+ }
for (c = i = 0; i < num_msgs; i++) {
if (!(msg[i].flags & I2C_M_RD)) {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 7bc95c404377..b891a5e0a396 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -2501,8 +2501,11 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp
return -EINVAL;
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
1 << workload_type, NULL);
+
if (ret)
dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
+ else
+ smu->workload_mask = (1 << workload_type);
return ret;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index 43820d7d2c54..5899d01fa73d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -1861,10 +1861,14 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
if (workload_type < 0)
return -EINVAL;
- return smu_cmn_send_smc_msg_with_param(smu,
+ ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_SetWorkloadMask,
1 << workload_type,
NULL);
+ if (!ret)
+ smu->workload_mask = 1 << workload_type;
+
+ return ret;
}
static int smu_v14_0_2_baco_enter(struct smu_context *smu)
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 07e493d14d0c..ad1dc638c83b 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -103,7 +103,6 @@ bool drm_dev_needs_global_mutex(struct drm_device *dev)
* .compat_ioctl = drm_compat_ioctl, // NULL if CONFIG_COMPAT=n
* .poll = drm_poll,
* .read = drm_read,
- * .llseek = no_llseek,
* .mmap = drm_gem_mmap,
* };
*
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 00fbe9f8c03a..b1c294236cc8 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -916,7 +916,7 @@ intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port,
* instead of a specific AUX_IO_<port> reference without powering up any
* extra wells.
*/
- if (intel_encoder_can_psr(&dig_port->base))
+ if (intel_psr_needs_aux_io_power(&dig_port->base, crtc_state))
return intel_display_power_aux_io_domain(i915, dig_port->aux_ch);
else if (DISPLAY_VER(i915) < 14 &&
(intel_crtc_has_dp_encoder(crtc_state) ||
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index a1fcedfd404b..90fa73575feb 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -531,6 +531,10 @@ static void
intel_dp_set_source_rates(struct intel_dp *intel_dp)
{
/* The values must be in increasing order */
+ static const int bmg_rates[] = {
+ 162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000,
+ 810000, 1000000, 1350000,
+ };
static const int mtl_rates[] = {
162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000,
810000, 1000000, 2000000,
@@ -561,8 +565,13 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp)
intel_dp->source_rates || intel_dp->num_source_rates);
if (DISPLAY_VER(dev_priv) >= 14) {
- source_rates = mtl_rates;
- size = ARRAY_SIZE(mtl_rates);
+ if (IS_BATTLEMAGE(dev_priv)) {
+ source_rates = bmg_rates;
+ size = ARRAY_SIZE(bmg_rates);
+ } else {
+ source_rates = mtl_rates;
+ size = ARRAY_SIZE(mtl_rates);
+ }
max_rate = mtl_max_source_rate(intel_dp);
} else if (DISPLAY_VER(dev_priv) >= 11) {
source_rates = icl_rates;
@@ -4058,6 +4067,9 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector
drm_dp_is_branch(intel_dp->dpcd));
intel_init_dpcd_quirks(intel_dp, &intel_dp->desc.ident);
+ intel_dp->colorimetry_support =
+ intel_dp_get_colorimetry_status(intel_dp);
+
/*
* Read the eDP display control registers.
*
@@ -4171,6 +4183,9 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
intel_init_dpcd_quirks(intel_dp, &intel_dp->desc.ident);
+ intel_dp->colorimetry_support =
+ intel_dp_get_colorimetry_status(intel_dp);
+
intel_dp_update_sink_caps(intel_dp);
}
@@ -6922,9 +6937,6 @@ intel_dp_init_connector(struct intel_digital_port *dig_port,
"HDCP init failed, skipping.\n");
}
- intel_dp->colorimetry_support =
- intel_dp_get_colorimetry_status(intel_dp);
-
intel_dp->frl.is_trained = false;
intel_dp->frl.trained_rate_gbps = 0;
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 1f83b3b67ea6..136a0d6ca970 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -203,6 +203,25 @@ bool intel_encoder_can_psr(struct intel_encoder *encoder)
return false;
}
+bool intel_psr_needs_aux_io_power(struct intel_encoder *encoder,
+ const struct intel_crtc_state *crtc_state)
+{
+ /*
+ * For PSR/PR modes only eDP requires the AUX IO power to be enabled whenever
+ * the output is enabled. For non-eDP outputs the main link is always
+ * on, hence it doesn't require the HW initiated AUX wake-up signaling used
+ * for eDP.
+ *
+ * TODO:
+ * - Consider leaving AUX IO disabled for eDP / PR as well, in case
+ * the ALPM with main-link off mode is not enabled.
+ * - Leave AUX IO enabled for DP / PR, once support for ALPM with
+ * main-link off mode is added for it and this mode gets enabled.
+ */
+ return intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) &&
+ intel_encoder_can_psr(encoder);
+}
+
static bool psr_global_enabled(struct intel_dp *intel_dp)
{
struct intel_display *display = to_intel_display(intel_dp);
@@ -2784,13 +2803,6 @@ static int _psr1_ready_for_pipe_update_locked(struct intel_dp *intel_dp)
EDP_PSR_STATUS_STATE_MASK, 50);
}
-static int _panel_replay_ready_for_pipe_update_locked(struct intel_dp *intel_dp)
-{
- return intel_dp_is_edp(intel_dp) ?
- _psr2_ready_for_pipe_update_locked(intel_dp) :
- _psr1_ready_for_pipe_update_locked(intel_dp);
-}
-
/**
* intel_psr_wait_for_idle_locked - wait for PSR be ready for a pipe update
* @new_crtc_state: new CRTC state
@@ -2813,12 +2825,10 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat
lockdep_assert_held(&intel_dp->psr.lock);
- if (!intel_dp->psr.enabled)
+ if (!intel_dp->psr.enabled || intel_dp->psr.panel_replay_enabled)
continue;
- if (intel_dp->psr.panel_replay_enabled)
- ret = _panel_replay_ready_for_pipe_update_locked(intel_dp);
- else if (intel_dp->psr.sel_update_enabled)
+ if (intel_dp->psr.sel_update_enabled)
ret = _psr2_ready_for_pipe_update_locked(intel_dp);
else
ret = _psr1_ready_for_pipe_update_locked(intel_dp);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index 4e09c10908e4..6eb5f15f674f 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -25,6 +25,8 @@ struct intel_plane_state;
(intel_dp)->psr.source_panel_replay_support)
bool intel_encoder_can_psr(struct intel_encoder *encoder);
+bool intel_psr_needs_aux_io_power(struct intel_encoder *encoder,
+ const struct intel_crtc_state *crtc_state);
void intel_psr_init_dpcd(struct intel_dp *intel_dp);
void intel_psr_enable_sink(struct intel_dp *intel_dp,
const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 025a79fe5920..2406cda75b7b 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3751,7 +3751,6 @@ static int i915_perf_release(struct inode *inode, struct file *file)
static const struct file_operations fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.release = i915_perf_release,
.poll = i915_perf_poll,
.read = i915_perf_read,
diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c
index 3d3da79fec2a..d3c7889aaf26 100644
--- a/drivers/gpu/drm/msm/msm_perf.c
+++ b/drivers/gpu/drm/msm/msm_perf.c
@@ -192,7 +192,6 @@ static const struct file_operations perf_debugfs_fops = {
.owner = THIS_MODULE,
.open = perf_open,
.read = perf_read,
- .llseek = no_llseek,
.release = perf_release,
};
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index ca44fd291c5b..39138e190cb9 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -227,7 +227,6 @@ static const struct file_operations rd_debugfs_fops = {
.owner = THIS_MODULE,
.open = rd_open,
.read = rd_read,
- .llseek = no_llseek,
.release = rd_release,
};
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index a13e0b3a169e..ef777dbdf4ec 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -65,7 +65,8 @@ __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
{
u32 size = drm_suballoc_size(bb->bo);
- bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+ if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END)
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 06911e9a3bf5..f379df3a12bf 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -2320,6 +2320,20 @@ void xe_bo_put_commit(struct llist_head *deferred)
drm_gem_object_free(&bo->ttm.base.refcount);
}
+void xe_bo_put(struct xe_bo *bo)
+{
+ might_sleep();
+ if (bo) {
+#ifdef CONFIG_PROC_FS
+ if (bo->client)
+ might_lock(&bo->client->bos_lock);
+#endif
+ if (bo->ggtt_node && bo->ggtt_node->ggtt)
+ might_lock(&bo->ggtt_node->ggtt->lock);
+ drm_gem_object_put(&bo->ttm.base);
+ }
+}
+
/**
* xe_bo_dumb_create - Create a dumb bo as backing for a fb
* @file_priv: ...
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index dbfb3209615d..6e4be52306df 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -126,11 +126,7 @@ static inline struct xe_bo *xe_bo_get(struct xe_bo *bo)
return bo;
}
-static inline void xe_bo_put(struct xe_bo *bo)
-{
- if (bo)
- drm_gem_object_put(&bo->ttm.base);
-}
+void xe_bo_put(struct xe_bo *bo);
static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo)
{
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 95a05c5bc897..c4add8b38bbd 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -168,15 +168,10 @@ static void bo_meminfo(struct xe_bo *bo,
struct drm_memory_stats stats[TTM_NUM_MEM_TYPES])
{
u64 sz = bo->size;
- u32 mem_type;
+ u32 mem_type = bo->ttm.resource->mem_type;
xe_bo_assert_held(bo);
- if (bo->placement.placement)
- mem_type = bo->placement.placement->mem_type;
- else
- mem_type = XE_PL_TT;
-
if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base))
stats[mem_type].shared += sz;
else
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 730eec07795e..00af059a8971 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -212,6 +212,12 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
* TODO: Change to read lock? Using write lock for simplicity.
*/
down_write(&vm->lock);
+
+ if (xe_vm_is_closed(vm)) {
+ err = -ENOENT;
+ goto unlock_vm;
+ }
+
vma = lookup_vma(vm, pf->page_addr);
if (!vma) {
err = -EINVAL;
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index c3e6b51f7a09..42116b167c98 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -18,8 +18,10 @@
*/
#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch)
-#define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY])
-#define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE])
+#define GUC_SUBMIT_VER(guc) \
+ MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY])
+#define GUC_FIRMWARE_VER(guc) \
+ MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE])
struct drm_printer;
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 0369cc016f6a..eae38a49ee8e 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -1263,7 +1263,6 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
static const struct file_operations xe_oa_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.release = xe_oa_release,
.poll = xe_oa_poll,
.read = xe_oa_read,
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index 5bcd59190353..80ba2fc78837 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -182,6 +182,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
offset = offset_hi << 32; /* HW view bits 39:32 */
offset |= offset_lo << 6; /* HW view bits 31:6 */
offset *= num_enabled; /* convert to SW view */
+ offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */
/* We don't expect any holes */
xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size),
diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index a54c7995b9be..21a70420151e 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -803,7 +803,6 @@ static const struct file_operations uhid_fops = {
.read = uhid_char_read,
.write = uhid_char_write,
.poll = uhid_char_poll,
- .llseek = no_llseek,
};
static struct miscdevice uhid_misc = {
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index 3751c1e3eddd..1dc7e24fe4c5 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -783,7 +783,6 @@ static const struct file_operations atk_debugfs_ggrp_fops = {
.read = atk_debugfs_ggrp_read,
.open = atk_debugfs_ggrp_open,
.release = atk_debugfs_ggrp_release,
- .llseek = no_llseek,
};
static void atk_debugfs_init(struct atk_data *data)
diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c
index 1811f84d835e..a303959879ef 100644
--- a/drivers/hwmon/fschmd.c
+++ b/drivers/hwmon/fschmd.c
@@ -948,7 +948,6 @@ static long watchdog_ioctl(struct file *filp, unsigned int cmd,
static const struct file_operations watchdog_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.open = watchdog_open,
.release = watchdog_release,
.write = watchdog_write,
diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c
index 0acf6bd0227f..67728f60333f 100644
--- a/drivers/hwmon/w83793.c
+++ b/drivers/hwmon/w83793.c
@@ -1451,7 +1451,6 @@ static long watchdog_ioctl(struct file *filp, unsigned int cmd,
static const struct file_operations watchdog_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.open = watchdog_open,
.release = watchdog_close,
.write = watchdog_write,
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 7edd3f1d0d46..aea9ac9c4bd0 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -652,7 +652,6 @@ static const struct file_operations etb_fops = {
.open = etb_open,
.read = etb_read,
.release = etb_release,
- .llseek = no_llseek,
};
static struct attribute *coresight_etb_mgmt_attrs[] = {
diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c
index b54562f392f3..3a482fd2cb22 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-core.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-core.c
@@ -220,7 +220,6 @@ static const struct file_operations tmc_fops = {
.open = tmc_open,
.read = tmc_read,
.release = tmc_release,
- .llseek = no_llseek,
};
static enum tmc_mem_intf_width tmc_get_memwidth(u32 devid)
diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c
index f9ebf20c91e6..ef7f560f0ffa 100644
--- a/drivers/hwtracing/coresight/ultrasoc-smb.c
+++ b/drivers/hwtracing/coresight/ultrasoc-smb.c
@@ -163,7 +163,6 @@ static const struct file_operations smb_fops = {
.open = smb_open,
.read = smb_read,
.release = smb_release,
- .llseek = no_llseek,
};
static ssize_t buf_size_show(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index be63d5b8f193..66123d684ac9 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -1677,7 +1677,6 @@ static const struct file_operations intel_th_msc_fops = {
.release = intel_th_msc_release,
.read = intel_th_msc_read,
.mmap = intel_th_msc_mmap,
- .llseek = no_llseek,
.owner = THIS_MODULE,
};
diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index ccf39a80dc4f..cdba4e875b28 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -839,7 +839,6 @@ static const struct file_operations stm_fops = {
.mmap = stm_char_mmap,
.unlocked_ioctl = stm_char_ioctl,
.compat_ioctl = compat_ptr_ioctl,
- .llseek = no_llseek,
};
static void stm_device_release(struct device *dev)
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index f4fb212b7f39..61f7c4003d2f 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -637,7 +637,6 @@ static int i2cdev_release(struct inode *inode, struct file *file)
static const struct file_operations i2cdev_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = i2cdev_read,
.write = i2cdev_write,
.unlocked_ioctl = i2cdev_ioctl,
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 9457e34b9e32..67aebfe0fed6 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -121,6 +121,12 @@ static unsigned int mwait_substates __initdata;
#define CPUIDLE_FLAG_INIT_XSTATE BIT(17)
/*
+ * Ignore the sub-state when matching mwait hints between the ACPI _CST and
+ * custom tables.
+ */
+#define CPUIDLE_FLAG_PARTIAL_HINT_MATCH BIT(18)
+
+/*
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble)
* 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
@@ -1043,7 +1049,8 @@ static struct cpuidle_state gnr_cstates[] __initdata = {
.name = "C6",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
- CPUIDLE_FLAG_INIT_XSTATE,
+ CPUIDLE_FLAG_INIT_XSTATE |
+ CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
.exit_latency = 170,
.target_residency = 650,
.enter = &intel_idle,
@@ -1052,7 +1059,8 @@ static struct cpuidle_state gnr_cstates[] __initdata = {
.name = "C6P",
.desc = "MWAIT 0x21",
.flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED |
- CPUIDLE_FLAG_INIT_XSTATE,
+ CPUIDLE_FLAG_INIT_XSTATE |
+ CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
.exit_latency = 210,
.target_residency = 1000,
.enter = &intel_idle,
@@ -1354,7 +1362,8 @@ static struct cpuidle_state srf_cstates[] __initdata = {
{
.name = "C6S",
.desc = "MWAIT 0x22",
- .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED |
+ CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
.exit_latency = 270,
.target_residency = 700,
.enter = &intel_idle,
@@ -1362,7 +1371,8 @@ static struct cpuidle_state srf_cstates[] __initdata = {
{
.name = "C6SP",
.desc = "MWAIT 0x23",
- .flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED |
+ CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
.exit_latency = 310,
.target_residency = 900,
.enter = &intel_idle,
@@ -1744,7 +1754,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
}
}
-static bool __init intel_idle_off_by_default(u32 mwait_hint)
+static bool __init intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
{
int cstate, limit;
@@ -1761,7 +1771,15 @@ static bool __init intel_idle_off_by_default(u32 mwait_hint)
* the interesting states are ACPI_CSTATE_FFH.
*/
for (cstate = 1; cstate < limit; cstate++) {
- if (acpi_state_table.states[cstate].address == mwait_hint)
+ u32 acpi_hint = acpi_state_table.states[cstate].address;
+ u32 table_hint = mwait_hint;
+
+ if (flags & CPUIDLE_FLAG_PARTIAL_HINT_MATCH) {
+ acpi_hint &= ~MWAIT_SUBSTATE_MASK;
+ table_hint &= ~MWAIT_SUBSTATE_MASK;
+ }
+
+ if (acpi_hint == table_hint)
return false;
}
return true;
@@ -1771,7 +1789,10 @@ static bool __init intel_idle_off_by_default(u32 mwait_hint)
static inline bool intel_idle_acpi_cst_extract(void) { return false; }
static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
-static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
+static inline bool intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
+{
+ return false;
+}
#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
/**
@@ -2098,7 +2119,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
- intel_idle_off_by_default(mwait_hint) &&
+ intel_idle_off_by_default(state->flags, mwait_hint) &&
!(state->flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
state->flags |= CPUIDLE_FLAG_OFF;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index dc57d07a1f45..5dbb248e9625 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1817,7 +1817,6 @@ static const struct file_operations ucma_fops = {
.release = ucma_close,
.write = ucma_write,
.poll = ucma_poll,
- .llseek = no_llseek,
};
static struct miscdevice ucma_misc = {
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index f760dfffa188..fd67fc9fe85a 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1082,7 +1082,6 @@ static const struct file_operations umad_fops = {
#endif
.open = ib_umad_open,
.release = ib_umad_close,
- .llseek = no_llseek,
};
static int ib_umad_sm_open(struct inode *inode, struct file *filp)
@@ -1150,7 +1149,6 @@ static const struct file_operations umad_sm_fops = {
.owner = THIS_MODULE,
.open = ib_umad_sm_open,
.release = ib_umad_sm_close,
- .llseek = no_llseek,
};
static struct ib_umad_port *get_port(struct ib_device *ibdev,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index bc099287de9a..94454186ed81 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -353,7 +353,6 @@ const struct file_operations uverbs_event_fops = {
.poll = ib_uverbs_comp_event_poll,
.release = uverbs_uobject_fd_release,
.fasync = ib_uverbs_comp_event_fasync,
- .llseek = no_llseek,
};
const struct file_operations uverbs_async_event_fops = {
@@ -362,7 +361,6 @@ const struct file_operations uverbs_async_event_fops = {
.poll = ib_uverbs_async_event_poll,
.release = uverbs_async_event_release,
.fasync = ib_uverbs_async_event_fasync,
- .llseek = no_llseek,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
@@ -991,7 +989,6 @@ static const struct file_operations uverbs_fops = {
.write = ib_uverbs_write,
.open = ib_uverbs_open,
.release = ib_uverbs_close,
- .llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
.compat_ioctl = compat_ptr_ioctl,
};
@@ -1002,7 +999,6 @@ static const struct file_operations uverbs_mmap_fops = {
.mmap = ib_uverbs_mmap,
.open = ib_uverbs_open,
.release = ib_uverbs_close,
- .llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
.compat_ioctl = compat_ptr_ioctl,
};
diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
index 35d2382ee618..ec9ee59fcf0c 100644
--- a/drivers/infiniband/hw/hfi1/fault.c
+++ b/drivers/infiniband/hw/hfi1/fault.c
@@ -203,7 +203,6 @@ static const struct file_operations __fault_opcodes_fops = {
.open = fault_opcodes_open,
.read = fault_opcodes_read,
.write = fault_opcodes_write,
- .llseek = no_llseek
};
void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 253fea374a72..69999d8d24f3 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2673,7 +2673,6 @@ static const struct file_operations devx_async_cmd_event_fops = {
.read = devx_async_cmd_event_read,
.poll = devx_async_cmd_event_poll,
.release = uverbs_uobject_fd_release,
- .llseek = no_llseek,
};
static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
@@ -2788,7 +2787,6 @@ static const struct file_operations devx_async_event_fops = {
.read = devx_async_event_read,
.poll = devx_async_event_poll,
.release = uverbs_uobject_fd_release,
- .llseek = no_llseek,
};
static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index eb4906552ac8..b5cbb57ee5f6 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -1299,7 +1299,6 @@ static const struct file_operations evdev_fops = {
.compat_ioctl = evdev_ioctl_compat,
#endif
.fasync = evdev_fasync,
- .llseek = no_llseek,
};
/*
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index 5824bca02e5a..ba2b17288bcd 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -718,7 +718,6 @@ static const struct file_operations joydev_fops = {
.compat_ioctl = joydev_compat_ioctl,
#endif
.fasync = joydev_fasync,
- .llseek = no_llseek,
};
/*
diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c
index cf25177b4830..707c5a8ae736 100644
--- a/drivers/input/keyboard/applespi.c
+++ b/drivers/input/keyboard/applespi.c
@@ -1007,7 +1007,6 @@ static const struct file_operations applespi_tp_dim_fops = {
.owner = THIS_MODULE,
.open = applespi_tp_dim_open,
.read = applespi_tp_dim_read,
- .llseek = no_llseek,
};
static void report_finger_data(struct input_dev *input, int slot,
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 445856c9127a..2c51ea9d01d7 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -1132,7 +1132,6 @@ static const struct file_operations uinput_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = uinput_compat_ioctl,
#endif
- .llseek = no_llseek,
};
static struct miscdevice uinput_misc = {
diff --git a/drivers/input/serio/userio.c b/drivers/input/serio/userio.c
index a88e2eee55c3..1ab12b247f98 100644
--- a/drivers/input/serio/userio.c
+++ b/drivers/input/serio/userio.c
@@ -267,7 +267,6 @@ static const struct file_operations userio_fops = {
.read = userio_char_read,
.write = userio_char_write,
.poll = userio_char_poll,
- .llseek = no_llseek,
};
static struct miscdevice userio_misc = {
diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
index 8c8226f0dffd..e590973ce5cf 100644
--- a/drivers/iommu/iommufd/fault.c
+++ b/drivers/iommu/iommufd/fault.c
@@ -360,7 +360,6 @@ static const struct file_operations iommufd_fault_fops = {
.write = iommufd_fault_fops_write,
.poll = iommufd_fault_fops_poll,
.release = iommufd_fault_fops_release,
- .llseek = no_llseek,
};
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 3ed257334562..70dee9ad4bae 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -1024,7 +1024,6 @@ static int capi_release(struct inode *inode, struct file *file)
static const struct file_operations capi_fops =
{
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = capi_read,
.write = capi_write,
.poll = capi_poll,
diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c
index 83d6b484d3c6..7cfa8c61dba0 100644
--- a/drivers/isdn/mISDN/timerdev.c
+++ b/drivers/isdn/mISDN/timerdev.c
@@ -266,7 +266,6 @@ static const struct file_operations mISDN_fops = {
.unlocked_ioctl = mISDN_ioctl,
.open = mISDN_open,
.release = mISDN_close,
- .llseek = no_llseek,
};
static struct miscdevice mISDNtimer = {
diff --git a/drivers/leds/uleds.c b/drivers/leds/uleds.c
index 3d361c920030..374a841f18c3 100644
--- a/drivers/leds/uleds.c
+++ b/drivers/leds/uleds.c
@@ -200,7 +200,6 @@ static const struct file_operations uleds_fops = {
.read = uleds_read,
.write = uleds_write,
.poll = uleds_poll,
- .llseek = no_llseek,
};
static struct miscdevice uleds_misc = {
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index b0407c5fadb2..88adee42ba82 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -842,7 +842,6 @@ out:
static const struct file_operations adb_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = adb_read,
.write = adb_write,
.open = adb_open,
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index b2b78a53e532..a01bc5090cdf 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -1314,7 +1314,6 @@ static int smu_release(struct inode *inode, struct file *file)
static const struct file_operations smu_device_fops = {
- .llseek = no_llseek,
.read = smu_read,
.write = smu_write,
.poll = smu_fpoll,
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 098bf526136c..d478aafa02c9 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -529,9 +529,6 @@ static struct dm_buffer *list_to_buffer(struct list_head *l)
{
struct lru_entry *le = list_entry(l, struct lru_entry, list);
- if (!le)
- return NULL;
-
return le_to_buffer(le);
}
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 17f0fab1e254..aaeeabfab09b 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -1368,7 +1368,7 @@ static void mg_copy(struct work_struct *ws)
*/
bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
- BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */
+ BUG_ON(rb); /* An exclusive lock must _not_ be held for this block */
mg->overwrite_bio = NULL;
inc_io_migrations(mg->cache);
mg_full_copy(ws);
@@ -3200,8 +3200,6 @@ static int parse_cblock_range(struct cache *cache, const char *str,
* Try and parse form (ii) first.
*/
r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
- if (r < 0)
- return r;
if (r == 2) {
result->begin = to_cblock(b);
@@ -3213,8 +3211,6 @@ static int parse_cblock_range(struct cache *cache, const char *str,
* That didn't work, try form (i).
*/
r = sscanf(str, "%llu%c", &b, &dummy);
- if (r < 0)
- return r;
if (r == 1) {
result->begin = to_cblock(b);
diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c
index 2db84cd2202b..14c5c28d938b 100644
--- a/drivers/md/dm-clone-metadata.c
+++ b/drivers/md/dm-clone-metadata.c
@@ -530,10 +530,7 @@ static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
return r;
for (i = 0; ; i++) {
- if (dm_bitset_cursor_get_value(&c))
- __set_bit(i, cmd->region_map);
- else
- __clear_bit(i, cmd->region_map);
+ __assign_bit(i, cmd->region_map, dm_bitset_cursor_get_value(&c));
if (i >= (cmd->nr_regions - 1))
break;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 348b4b26c272..5228b03b6fe0 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -147,6 +147,7 @@ enum cipher_flags {
CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cipher */
CRYPT_IV_LARGE_SECTORS, /* Calculate IV from sector_size, not 512B sectors */
CRYPT_ENCRYPT_PREPROCESS, /* Must preprocess data for encryption (elephant) */
+ CRYPT_KEY_MAC_SIZE_SET, /* The integrity_key_size option was used */
};
/*
@@ -2613,35 +2614,31 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
key = request_key(type, key_desc + 1, NULL);
if (IS_ERR(key)) {
- kfree_sensitive(new_key_string);
- return PTR_ERR(key);
+ ret = PTR_ERR(key);
+ goto free_new_key_string;
}
down_read(&key->sem);
-
ret = set_key(cc, key);
- if (ret < 0) {
- up_read(&key->sem);
- key_put(key);
- kfree_sensitive(new_key_string);
- return ret;
- }
-
up_read(&key->sem);
key_put(key);
+ if (ret < 0)
+ goto free_new_key_string;
/* clear the flag since following operations may invalidate previously valid key */
clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
ret = crypt_setkey(cc);
+ if (ret)
+ goto free_new_key_string;
- if (!ret) {
- set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
- kfree_sensitive(cc->key_string);
- cc->key_string = new_key_string;
- } else
- kfree_sensitive(new_key_string);
+ set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
+ kfree_sensitive(cc->key_string);
+ cc->key_string = new_key_string;
+ return 0;
+free_new_key_string:
+ kfree_sensitive(new_key_string);
return ret;
}
@@ -2937,7 +2934,8 @@ static int crypt_ctr_auth_cipher(struct crypt_config *cc, char *cipher_api)
if (IS_ERR(mac))
return PTR_ERR(mac);
- cc->key_mac_size = crypto_ahash_digestsize(mac);
+ if (!test_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags))
+ cc->key_mac_size = crypto_ahash_digestsize(mac);
crypto_free_ahash(mac);
cc->authenc_key = kmalloc(crypt_authenckey_size(cc), GFP_KERNEL);
@@ -3219,6 +3217,13 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
cc->cipher_auth = kstrdup(sval, GFP_KERNEL);
if (!cc->cipher_auth)
return -ENOMEM;
+ } else if (sscanf(opt_string, "integrity_key_size:%u%c", &val, &dummy) == 1) {
+ if (!val) {
+ ti->error = "Invalid integrity_key_size argument";
+ return -EINVAL;
+ }
+ cc->key_mac_size = val;
+ set_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags);
} else if (sscanf(opt_string, "sector_size:%hu%c", &cc->sector_size, &dummy) == 1) {
if (cc->sector_size < (1 << SECTOR_SHIFT) ||
cc->sector_size > 4096 ||
@@ -3607,10 +3612,10 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
+ num_feature_args += !!cc->used_tag_size;
num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT);
num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags);
- if (cc->used_tag_size)
- num_feature_args++;
+ num_feature_args += test_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags);
if (num_feature_args) {
DMEMIT(" %d", num_feature_args);
if (ti->num_discard_bios)
@@ -3631,6 +3636,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT(" sector_size:%d", cc->sector_size);
if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags))
DMEMIT(" iv_large_sectors");
+ if (test_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags))
+ DMEMIT(" integrity_key_size:%u", cc->key_mac_size);
}
break;
@@ -3758,7 +3765,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 27, 0},
+ .version = {1, 28, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index acff2f64f251..ee9f7cecd78e 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -284,6 +284,7 @@ struct dm_integrity_c {
mempool_t recheck_pool;
struct bio_set recheck_bios;
+ struct bio_set recalc_bios;
struct notifier_block reboot_notifier;
};
@@ -321,7 +322,9 @@ struct dm_integrity_io {
struct dm_bio_details bio_details;
char *integrity_payload;
+ unsigned payload_len;
bool integrity_payload_from_mempool;
+ bool integrity_range_locked;
};
struct journal_completion {
@@ -359,7 +362,7 @@ static struct kmem_cache *journal_io_cache;
#endif
static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
-static int dm_integrity_map_inline(struct dm_integrity_io *dio);
+static int dm_integrity_map_inline(struct dm_integrity_io *dio, bool from_map);
static void integrity_bio_wait(struct work_struct *w);
static void dm_integrity_dtr(struct dm_target *ti);
@@ -491,7 +494,8 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr)
__u8 *sb = (__u8 *)ic->sb;
__u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size;
- if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT) {
+ if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT ||
+ mac_size > HASH_MAX_DIGESTSIZE) {
dm_integrity_io_error(ic, "digest is too long", -EINVAL);
return -EINVAL;
}
@@ -1500,15 +1504,15 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat
if (!ic->meta_dev)
flush_data = false;
if (flush_data) {
- fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC,
- fr.io_req.mem.type = DM_IO_KMEM,
- fr.io_req.mem.ptr.addr = NULL,
- fr.io_req.notify.fn = flush_notify,
+ fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+ fr.io_req.mem.type = DM_IO_KMEM;
+ fr.io_req.mem.ptr.addr = NULL;
+ fr.io_req.notify.fn = flush_notify;
fr.io_req.notify.context = &fr;
- fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio),
- fr.io_reg.bdev = ic->dev->bdev,
- fr.io_reg.sector = 0,
- fr.io_reg.count = 0,
+ fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio);
+ fr.io_reg.bdev = ic->dev->bdev;
+ fr.io_reg.sector = 0;
+ fr.io_reg.count = 0;
fr.ic = ic;
init_completion(&fr.comp);
r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL, IOPRIO_DEFAULT);
@@ -1946,8 +1950,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
dio->bi_status = 0;
dio->op = bio_op(bio);
- if (ic->mode == 'I')
- return dm_integrity_map_inline(dio);
+ if (ic->mode == 'I') {
+ bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector);
+ dio->integrity_payload = NULL;
+ dio->integrity_payload_from_mempool = false;
+ dio->integrity_range_locked = false;
+ return dm_integrity_map_inline(dio, true);
+ }
if (unlikely(dio->op == REQ_OP_DISCARD)) {
if (ti->max_io_len) {
@@ -2397,15 +2406,13 @@ journal_read_write:
do_endio_flush(ic, dio);
}
-static int dm_integrity_map_inline(struct dm_integrity_io *dio)
+static int dm_integrity_map_inline(struct dm_integrity_io *dio, bool from_map)
{
struct dm_integrity_c *ic = dio->ic;
struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
struct bio_integrity_payload *bip;
- unsigned payload_len, digest_size, extra_size, ret;
-
- dio->integrity_payload = NULL;
- dio->integrity_payload_from_mempool = false;
+ unsigned ret;
+ sector_t recalc_sector;
if (unlikely(bio_integrity(bio))) {
bio->bi_status = BLK_STS_NOTSUPP;
@@ -2418,28 +2425,67 @@ static int dm_integrity_map_inline(struct dm_integrity_io *dio)
return DM_MAPIO_REMAPPED;
retry:
- payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block);
- digest_size = crypto_shash_digestsize(ic->internal_hash);
- extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
- payload_len += extra_size;
- dio->integrity_payload = kmalloc(payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
- if (unlikely(!dio->integrity_payload)) {
- const unsigned x_size = PAGE_SIZE << 1;
- if (payload_len > x_size) {
- unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block;
- if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) {
- bio->bi_status = BLK_STS_NOTSUPP;
- bio_endio(bio);
- return DM_MAPIO_SUBMITTED;
+ if (!dio->integrity_payload) {
+ unsigned digest_size, extra_size;
+ dio->payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block);
+ digest_size = crypto_shash_digestsize(ic->internal_hash);
+ extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
+ dio->payload_len += extra_size;
+ dio->integrity_payload = kmalloc(dio->payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+ if (unlikely(!dio->integrity_payload)) {
+ const unsigned x_size = PAGE_SIZE << 1;
+ if (dio->payload_len > x_size) {
+ unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block;
+ if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) {
+ bio->bi_status = BLK_STS_NOTSUPP;
+ bio_endio(bio);
+ return DM_MAPIO_SUBMITTED;
+ }
+ dm_accept_partial_bio(bio, sectors);
+ goto retry;
}
- dm_accept_partial_bio(bio, sectors);
- goto retry;
}
+ }
+
+ dio->range.logical_sector = bio->bi_iter.bi_sector;
+ dio->range.n_sectors = bio_sectors(bio);
+
+ if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)))
+ goto skip_spinlock;
+#ifdef CONFIG_64BIT
+ /*
+ * On 64-bit CPUs we can optimize the lock away (so that it won't cause
+ * cache line bouncing) and use acquire/release barriers instead.
+ *
+ * Paired with smp_store_release in integrity_recalc_inline.
+ */
+ recalc_sector = le64_to_cpu(smp_load_acquire(&ic->sb->recalc_sector));
+ if (likely(dio->range.logical_sector + dio->range.n_sectors <= recalc_sector))
+ goto skip_spinlock;
+#endif
+ spin_lock_irq(&ic->endio_wait.lock);
+ recalc_sector = le64_to_cpu(ic->sb->recalc_sector);
+ if (dio->range.logical_sector + dio->range.n_sectors <= recalc_sector)
+ goto skip_unlock;
+ if (unlikely(!add_new_range(ic, &dio->range, true))) {
+ if (from_map) {
+ spin_unlock_irq(&ic->endio_wait.lock);
+ INIT_WORK(&dio->work, integrity_bio_wait);
+ queue_work(ic->wait_wq, &dio->work);
+ return DM_MAPIO_SUBMITTED;
+ }
+ wait_and_add_new_range(ic, &dio->range);
+ }
+ dio->integrity_range_locked = true;
+skip_unlock:
+ spin_unlock_irq(&ic->endio_wait.lock);
+skip_spinlock:
+
+ if (unlikely(!dio->integrity_payload)) {
dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO));
dio->integrity_payload_from_mempool = true;
}
- bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector);
dio->bio_details.bi_iter = bio->bi_iter;
if (unlikely(!dm_integrity_check_limits(ic, bio->bi_iter.bi_sector, bio))) {
@@ -2449,7 +2495,7 @@ retry:
bio->bi_iter.bi_sector += ic->start + SB_SECTORS;
bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
- if (unlikely(IS_ERR(bip))) {
+ if (IS_ERR(bip)) {
bio->bi_status = errno_to_blk_status(PTR_ERR(bip));
bio_endio(bio);
return DM_MAPIO_SUBMITTED;
@@ -2470,8 +2516,8 @@ retry:
}
ret = bio_integrity_add_page(bio, virt_to_page(dio->integrity_payload),
- payload_len, offset_in_page(dio->integrity_payload));
- if (unlikely(ret != payload_len)) {
+ dio->payload_len, offset_in_page(dio->integrity_payload));
+ if (unlikely(ret != dio->payload_len)) {
bio->bi_status = BLK_STS_RESOURCE;
bio_endio(bio);
return DM_MAPIO_SUBMITTED;
@@ -2522,7 +2568,7 @@ static void dm_integrity_inline_recheck(struct work_struct *w)
}
bip = bio_integrity_alloc(outgoing_bio, GFP_NOIO, 1);
- if (unlikely(IS_ERR(bip))) {
+ if (IS_ERR(bip)) {
bio_put(outgoing_bio);
bio->bi_status = errno_to_blk_status(PTR_ERR(bip));
bio_endio(bio);
@@ -2579,6 +2625,9 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status
struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
if (dio->op == REQ_OP_READ && likely(*status == BLK_STS_OK)) {
unsigned pos = 0;
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
+ unlikely(dio->integrity_range_locked))
+ goto skip_check;
while (dio->bio_details.bi_iter.bi_size) {
char digest[HASH_MAX_DIGESTSIZE];
struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
@@ -2598,9 +2647,10 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status
bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
}
}
- if (likely(dio->op == REQ_OP_READ) || likely(dio->op == REQ_OP_WRITE)) {
- dm_integrity_free_payload(dio);
- }
+skip_check:
+ dm_integrity_free_payload(dio);
+ if (unlikely(dio->integrity_range_locked))
+ remove_range(ic, &dio->range);
}
return DM_ENDIO_DONE;
}
@@ -2608,8 +2658,26 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status
static void integrity_bio_wait(struct work_struct *w)
{
struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
+ struct dm_integrity_c *ic = dio->ic;
- dm_integrity_map_continue(dio, false);
+ if (ic->mode == 'I') {
+ struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
+ int r = dm_integrity_map_inline(dio, false);
+ switch (r) {
+ case DM_MAPIO_KILL:
+ bio->bi_status = BLK_STS_IOERR;
+ fallthrough;
+ case DM_MAPIO_REMAPPED:
+ submit_bio_noacct(bio);
+ fallthrough;
+ case DM_MAPIO_SUBMITTED:
+ return;
+ default:
+ BUG();
+ }
+ } else {
+ dm_integrity_map_continue(dio, false);
+ }
}
static void pad_uncommitted(struct dm_integrity_c *ic)
@@ -3081,6 +3149,133 @@ free_ret:
kvfree(recalc_tags);
}
+static void integrity_recalc_inline(struct work_struct *w)
+{
+ struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work);
+ size_t recalc_tags_size;
+ u8 *recalc_buffer = NULL;
+ u8 *recalc_tags = NULL;
+ struct dm_integrity_range range;
+ struct bio *bio;
+ struct bio_integrity_payload *bip;
+ __u8 *t;
+ unsigned int i;
+ int r;
+ unsigned ret;
+ unsigned int super_counter = 0;
+ unsigned recalc_sectors = RECALC_SECTORS;
+
+retry:
+ recalc_buffer = kmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO | __GFP_NOWARN);
+ if (!recalc_buffer) {
+oom:
+ recalc_sectors >>= 1;
+ if (recalc_sectors >= 1U << ic->sb->log2_sectors_per_block)
+ goto retry;
+ DMCRIT("out of memory for recalculate buffer - recalculation disabled");
+ goto free_ret;
+ }
+
+ recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tuple_size;
+ if (crypto_shash_digestsize(ic->internal_hash) > ic->tuple_size)
+ recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tuple_size;
+ recalc_tags = kmalloc(recalc_tags_size, GFP_NOIO | __GFP_NOWARN);
+ if (!recalc_tags) {
+ kfree(recalc_buffer);
+ recalc_buffer = NULL;
+ goto oom;
+ }
+
+ spin_lock_irq(&ic->endio_wait.lock);
+
+next_chunk:
+ if (unlikely(dm_post_suspending(ic->ti)))
+ goto unlock_ret;
+
+ range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
+ if (unlikely(range.logical_sector >= ic->provided_data_sectors))
+ goto unlock_ret;
+ range.n_sectors = min((sector_t)recalc_sectors, ic->provided_data_sectors - range.logical_sector);
+
+ add_new_range_and_wait(ic, &range);
+ spin_unlock_irq(&ic->endio_wait.lock);
+
+ if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
+ recalc_write_super(ic);
+ super_counter = 0;
+ }
+
+ if (unlikely(dm_integrity_failed(ic)))
+ goto err;
+
+ DEBUG_print("recalculating: %llx - %llx\n", range.logical_sector, range.n_sectors);
+
+ bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recalc_bios);
+ bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector;
+ __bio_add_page(bio, virt_to_page(recalc_buffer), range.n_sectors << SECTOR_SHIFT, offset_in_page(recalc_buffer));
+ r = submit_bio_wait(bio);
+ bio_put(bio);
+ if (unlikely(r)) {
+ dm_integrity_io_error(ic, "reading data", r);
+ goto err;
+ }
+
+ t = recalc_tags;
+ for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) {
+ memset(t, 0, ic->tuple_size);
+ integrity_sector_checksum(ic, range.logical_sector + i, recalc_buffer + (i << SECTOR_SHIFT), t);
+ t += ic->tuple_size;
+ }
+
+ bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_WRITE, GFP_NOIO, &ic->recalc_bios);
+ bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector;
+ __bio_add_page(bio, virt_to_page(recalc_buffer), range.n_sectors << SECTOR_SHIFT, offset_in_page(recalc_buffer));
+
+ bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
+ if (unlikely(IS_ERR(bip))) {
+ bio_put(bio);
+ DMCRIT("out of memory for bio integrity payload - recalculation disabled");
+ goto err;
+ }
+ ret = bio_integrity_add_page(bio, virt_to_page(recalc_tags), t - recalc_tags, offset_in_page(recalc_tags));
+ if (unlikely(ret != t - recalc_tags)) {
+ bio_put(bio);
+ dm_integrity_io_error(ic, "attaching integrity tags", -ENOMEM);
+ goto err;
+ }
+
+ r = submit_bio_wait(bio);
+ bio_put(bio);
+ if (unlikely(r)) {
+ dm_integrity_io_error(ic, "writing data", r);
+ goto err;
+ }
+
+ cond_resched();
+ spin_lock_irq(&ic->endio_wait.lock);
+ remove_range_unlocked(ic, &range);
+#ifdef CONFIG_64BIT
+ /* Paired with smp_load_acquire in dm_integrity_map_inline. */
+ smp_store_release(&ic->sb->recalc_sector, cpu_to_le64(range.logical_sector + range.n_sectors));
+#else
+ ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors);
+#endif
+ goto next_chunk;
+
+err:
+ remove_range(ic, &range);
+ goto free_ret;
+
+unlock_ret:
+ spin_unlock_irq(&ic->endio_wait.lock);
+
+ recalc_write_super(ic);
+
+free_ret:
+ kfree(recalc_buffer);
+ kfree(recalc_tags);
+}
+
static void bitmap_block_work(struct work_struct *w)
{
struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work);
@@ -4619,6 +4814,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
r = -ENOMEM;
goto bad;
}
+ r = bioset_init(&ic->recalc_bios, 1, 0, BIOSET_NEED_BVECS);
+ if (r) {
+ ti->error = "Cannot allocate bio set";
+ goto bad;
+ }
+ r = bioset_integrity_create(&ic->recalc_bios, 1);
+ if (r) {
+ ti->error = "Cannot allocate bio integrity set";
+ r = -ENOMEM;
+ goto bad;
+ }
}
ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
@@ -4717,13 +4923,18 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
ti->error = "Block size doesn't match the information in superblock";
goto bad;
}
- if (!le32_to_cpu(ic->sb->journal_sections) != (ic->mode == 'I')) {
- r = -EINVAL;
- if (ic->mode != 'I')
+ if (ic->mode != 'I') {
+ if (!le32_to_cpu(ic->sb->journal_sections)) {
+ r = -EINVAL;
ti->error = "Corrupted superblock, journal_sections is 0";
- else
+ goto bad;
+ }
+ } else {
+ if (le32_to_cpu(ic->sb->journal_sections)) {
+ r = -EINVAL;
ti->error = "Corrupted superblock, journal_sections is not 0";
- goto bad;
+ goto bad;
+ }
}
/* make sure that ti->max_io_len doesn't overflow */
if (!ic->meta_dev) {
@@ -4830,7 +5041,7 @@ try_smaller_buffer:
r = -ENOMEM;
goto bad;
}
- INIT_WORK(&ic->recalc_work, integrity_recalc);
+ INIT_WORK(&ic->recalc_work, ic->mode == 'I' ? integrity_recalc_inline : integrity_recalc);
} else {
if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
ti->error = "Recalculate can only be specified with internal_hash";
@@ -4847,17 +5058,15 @@ try_smaller_buffer:
goto bad;
}
- if (ic->mode != 'I') {
- ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
- 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0);
- if (IS_ERR(ic->bufio)) {
- r = PTR_ERR(ic->bufio);
- ti->error = "Cannot initialize dm-bufio";
- ic->bufio = NULL;
- goto bad;
- }
- dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors);
+ ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
+ 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0);
+ if (IS_ERR(ic->bufio)) {
+ r = PTR_ERR(ic->bufio);
+ ti->error = "Cannot initialize dm-bufio";
+ ic->bufio = NULL;
+ goto bad;
}
+ dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors);
if (ic->mode != 'R' && ic->mode != 'I') {
r = create_journal(ic, &ti->error);
@@ -4979,6 +5188,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
kvfree(ic->bbs);
if (ic->bufio)
dm_bufio_client_destroy(ic->bufio);
+ bioset_exit(&ic->recalc_bios);
bioset_exit(&ic->recheck_bios);
mempool_exit(&ic->recheck_pool);
mempool_exit(&ic->journal_io_mempool);
@@ -5033,7 +5243,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
static struct target_type integrity_target = {
.name = "integrity",
- .version = {1, 12, 0},
+ .version = {1, 13, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 63682d27fc8d..1e0d3b9b75d6 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2519,7 +2519,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
rdev->saved_raid_disk = rdev->raid_disk;
}
- /* Reshape support -> restore repective data offsets */
+ /* Reshape support -> restore respective data offsets */
rdev->data_offset = le64_to_cpu(sb->data_offset);
rdev->new_data_offset = le64_to_cpu(sb->new_data_offset);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index f7e9a3632eb3..499f8cc8a39f 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -496,8 +496,10 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
map = dm_get_live_table(md, &srcu_idx);
if (unlikely(!map)) {
+ DMERR_LIMIT("%s: mapping table unavailable, erroring io",
+ dm_device_name(md));
dm_put_live_table(md, srcu_idx);
- return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
ti = dm_table_find_target(map, 0);
dm_put_live_table(md, srcu_idx);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index a0c1620e90c8..89632ce97760 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2948,7 +2948,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device);
if (IS_ERR(pmd)) {
*error = "Error creating metadata object";
- return (struct pool *)pmd;
+ return ERR_CAST(pmd);
}
pool = kzalloc(sizeof(*pool), GFP_KERNEL);
diff --git a/drivers/md/dm-vdo/data-vio.c b/drivers/md/dm-vdo/data-vio.c
index ab3ea8337809..0d502f6a86ad 100644
--- a/drivers/md/dm-vdo/data-vio.c
+++ b/drivers/md/dm-vdo/data-vio.c
@@ -501,6 +501,7 @@ static void launch_data_vio(struct data_vio *data_vio, logical_block_number_t lb
memset(&data_vio->record_name, 0, sizeof(data_vio->record_name));
memset(&data_vio->duplicate, 0, sizeof(data_vio->duplicate));
+ vdo_reset_completion(&data_vio->decrement_completion);
vdo_reset_completion(completion);
completion->error_handler = handle_data_vio_error;
set_data_vio_logical_callback(data_vio, attempt_logical_block_lock);
@@ -1273,12 +1274,14 @@ static void clean_hash_lock(struct vdo_completion *completion)
static void finish_cleanup(struct data_vio *data_vio)
{
struct vdo_completion *completion = &data_vio->vio.completion;
+ u32 discard_size = min_t(u32, data_vio->remaining_discard,
+ VDO_BLOCK_SIZE - data_vio->offset);
VDO_ASSERT_LOG_ONLY(data_vio->allocation.lock == NULL,
"complete data_vio has no allocation lock");
VDO_ASSERT_LOG_ONLY(data_vio->hash_lock == NULL,
"complete data_vio has no hash lock");
- if ((data_vio->remaining_discard <= VDO_BLOCK_SIZE) ||
+ if ((data_vio->remaining_discard <= discard_size) ||
(completion->result != VDO_SUCCESS)) {
struct data_vio_pool *pool = completion->vdo->data_vio_pool;
@@ -1287,12 +1290,12 @@ static void finish_cleanup(struct data_vio *data_vio)
return;
}
- data_vio->remaining_discard -= min_t(u32, data_vio->remaining_discard,
- VDO_BLOCK_SIZE - data_vio->offset);
+ data_vio->remaining_discard -= discard_size;
data_vio->is_partial = (data_vio->remaining_discard < VDO_BLOCK_SIZE);
data_vio->read = data_vio->is_partial;
data_vio->offset = 0;
completion->requeue = true;
+ data_vio->first_reference_operation_complete = false;
launch_data_vio(data_vio, data_vio->logical.lbn + 1);
}
@@ -1965,7 +1968,8 @@ static void allocate_block(struct vdo_completion *completion)
.state = VDO_MAPPING_STATE_UNCOMPRESSED,
};
- if (data_vio->fua) {
+ if (data_vio->fua ||
+ data_vio->remaining_discard > (u32) (VDO_BLOCK_SIZE - data_vio->offset)) {
prepare_for_dedupe(data_vio);
return;
}
@@ -2042,7 +2046,6 @@ void continue_data_vio_with_block_map_slot(struct vdo_completion *completion)
return;
}
-
/*
* We don't need to write any data, so skip allocation and just update the block map and
* reference counts (via the journal).
@@ -2051,7 +2054,7 @@ void continue_data_vio_with_block_map_slot(struct vdo_completion *completion)
if (data_vio->is_zero)
data_vio->new_mapped.state = VDO_MAPPING_STATE_UNCOMPRESSED;
- if (data_vio->remaining_discard > VDO_BLOCK_SIZE) {
+ if (data_vio->remaining_discard > (u32) (VDO_BLOCK_SIZE - data_vio->offset)) {
/* This is not the final block of a discard so we can't acknowledge it yet. */
update_metadata_for_data_vio_write(data_vio, NULL);
return;
diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c
index 39ac68614419..80628ae93fba 100644
--- a/drivers/md/dm-vdo/dedupe.c
+++ b/drivers/md/dm-vdo/dedupe.c
@@ -729,6 +729,7 @@ static void process_update_result(struct data_vio *agent)
!change_context_state(context, DEDUPE_CONTEXT_COMPLETE, DEDUPE_CONTEXT_IDLE))
return;
+ agent->dedupe_context = NULL;
release_context(context);
}
@@ -1648,6 +1649,7 @@ static void process_query_result(struct data_vio *agent)
if (change_context_state(context, DEDUPE_CONTEXT_COMPLETE, DEDUPE_CONTEXT_IDLE)) {
agent->is_duplicate = decode_uds_advice(context);
+ agent->dedupe_context = NULL;
release_context(context);
}
}
@@ -2321,6 +2323,7 @@ static void timeout_index_operations_callback(struct vdo_completion *completion)
* send its requestor on its way.
*/
list_del_init(&context->list_entry);
+ context->requestor->dedupe_context = NULL;
continue_data_vio(context->requestor);
timed_out++;
}
diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c
index dd05691e4097..0e04c2021682 100644
--- a/drivers/md/dm-vdo/dm-vdo-target.c
+++ b/drivers/md/dm-vdo/dm-vdo-target.c
@@ -1105,6 +1105,9 @@ static int vdo_message(struct dm_target *ti, unsigned int argc, char **argv,
if ((argc == 1) && (strcasecmp(argv[0], "stats") == 0)) {
vdo_write_stats(vdo, result_buffer, maxlen);
result = 1;
+ } else if ((argc == 1) && (strcasecmp(argv[0], "config") == 0)) {
+ vdo_write_config(vdo, &result_buffer, &maxlen);
+ result = 1;
} else {
result = vdo_status_to_errno(process_vdo_message(vdo, argc, argv));
}
@@ -2293,6 +2296,14 @@ static void handle_load_error(struct vdo_completion *completion)
return;
}
+ if ((completion->result == VDO_UNSUPPORTED_VERSION) &&
+ (vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) {
+ vdo_log_error("Aborting load due to unsupported version");
+ vdo->admin.phase = LOAD_PHASE_FINISHED;
+ load_callback(completion);
+ return;
+ }
+
vdo_log_error_strerror(completion->result,
"Entering read-only mode due to load error");
vdo->admin.phase = LOAD_PHASE_WAIT_FOR_READ_ONLY;
@@ -2737,6 +2748,19 @@ static int vdo_preresume_registered(struct dm_target *ti, struct vdo *vdo)
vdo_log_info("starting device '%s'", device_name);
result = perform_admin_operation(vdo, LOAD_PHASE_START, load_callback,
handle_load_error, "load");
+ if (result == VDO_UNSUPPORTED_VERSION) {
+ /*
+ * A component version is not supported. This can happen when the
+ * recovery journal metadata is in an old version format. Abort the
+ * load without saving the state.
+ */
+ vdo->suspend_type = VDO_ADMIN_STATE_SUSPENDING;
+ perform_admin_operation(vdo, SUSPEND_PHASE_START,
+ suspend_callback, suspend_callback,
+ "suspend");
+ return result;
+ }
+
if ((result != VDO_SUCCESS) && (result != VDO_READ_ONLY)) {
/*
* Something has gone very wrong. Make sure everything has drained and
@@ -2808,7 +2832,8 @@ static int vdo_preresume(struct dm_target *ti)
vdo_register_thread_device_id(&instance_thread, &vdo->instance);
result = vdo_preresume_registered(ti, vdo);
- if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE))
+ if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE) ||
+ (result == VDO_UNSUPPORTED_VERSION))
result = -EINVAL;
vdo_unregister_thread_device_id();
return vdo_status_to_errno(result);
@@ -2832,7 +2857,7 @@ static void vdo_resume(struct dm_target *ti)
static struct target_type vdo_target_bio = {
.features = DM_TARGET_SINGLETON,
.name = "vdo",
- .version = { 9, 0, 0 },
+ .version = { 9, 1, 0 },
.module = THIS_MODULE,
.ctr = vdo_ctr,
.dtr = vdo_dtr,
diff --git a/drivers/md/dm-vdo/indexer/chapter-index.c b/drivers/md/dm-vdo/indexer/chapter-index.c
index 7e32a25d3f2f..fb1db41c794b 100644
--- a/drivers/md/dm-vdo/indexer/chapter-index.c
+++ b/drivers/md/dm-vdo/indexer/chapter-index.c
@@ -177,7 +177,7 @@ int uds_pack_open_chapter_index_page(struct open_chapter_index *chapter_index,
if (list_number < 0)
return UDS_OVERFLOW;
- next_list = first_list + list_number--,
+ next_list = first_list + list_number--;
result = uds_start_delta_index_search(delta_index, next_list, 0,
&entry);
if (result != UDS_SUCCESS)
diff --git a/drivers/md/dm-vdo/io-submitter.c b/drivers/md/dm-vdo/io-submitter.c
index 9a3716bb3c05..ab62abe18827 100644
--- a/drivers/md/dm-vdo/io-submitter.c
+++ b/drivers/md/dm-vdo/io-submitter.c
@@ -346,7 +346,6 @@ void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical,
VDO_ASSERT_LOG_ONLY(!code->quiescent, "I/O not allowed in state %s", code->name);
- VDO_ASSERT_LOG_ONLY(vio->bio->bi_next == NULL, "metadata bio has no next bio");
vdo_reset_completion(completion);
completion->error_handler = error_handler;
diff --git a/drivers/md/dm-vdo/message-stats.c b/drivers/md/dm-vdo/message-stats.c
index 2802cf92922b..75dfcd7c5f63 100644
--- a/drivers/md/dm-vdo/message-stats.c
+++ b/drivers/md/dm-vdo/message-stats.c
@@ -4,6 +4,7 @@
*/
#include "dedupe.h"
+#include "indexer.h"
#include "logger.h"
#include "memory-alloc.h"
#include "message-stats.h"
@@ -430,3 +431,50 @@ int vdo_write_stats(struct vdo *vdo, char *buf, unsigned int maxlen)
vdo_free(stats);
return VDO_SUCCESS;
}
+
+static void write_index_memory(u32 mem, char **buf, unsigned int *maxlen)
+{
+ char *prefix = "memorySize : ";
+
+ /* Convert index memory to fractional value */
+ if (mem == (u32)UDS_MEMORY_CONFIG_256MB)
+ write_string(prefix, "0.25, ", NULL, buf, maxlen);
+ else if (mem == (u32)UDS_MEMORY_CONFIG_512MB)
+ write_string(prefix, "0.50, ", NULL, buf, maxlen);
+ else if (mem == (u32)UDS_MEMORY_CONFIG_768MB)
+ write_string(prefix, "0.75, ", NULL, buf, maxlen);
+ else
+ write_u32(prefix, mem, ", ", buf, maxlen);
+}
+
+static void write_index_config(struct index_config *config, char **buf,
+ unsigned int *maxlen)
+{
+ write_string("index : ", "{ ", NULL, buf, maxlen);
+ /* index mem size */
+ write_index_memory(config->mem, buf, maxlen);
+ /* whether the index is sparse or not */
+ write_bool("isSparse : ", config->sparse, ", ", buf, maxlen);
+ write_string(NULL, "}", ", ", buf, maxlen);
+}
+
+int vdo_write_config(struct vdo *vdo, char **buf, unsigned int *maxlen)
+{
+ struct vdo_config *config = &vdo->states.vdo.config;
+
+ write_string(NULL, "{ ", NULL, buf, maxlen);
+ /* version */
+ write_u32("version : ", 1, ", ", buf, maxlen);
+ /* physical size */
+ write_block_count_t("physicalSize : ", config->physical_blocks * VDO_BLOCK_SIZE, ", ",
+ buf, maxlen);
+ /* logical size */
+ write_block_count_t("logicalSize : ", config->logical_blocks * VDO_BLOCK_SIZE, ", ",
+ buf, maxlen);
+ /* slab size */
+ write_block_count_t("slabSize : ", config->slab_size, ", ", buf, maxlen);
+ /* index config */
+ write_index_config(&vdo->geometry.index_config, buf, maxlen);
+ write_string(NULL, "}", NULL, buf, maxlen);
+ return VDO_SUCCESS;
+}
diff --git a/drivers/md/dm-vdo/message-stats.h b/drivers/md/dm-vdo/message-stats.h
index f7fceca9acab..f9c95eff569d 100644
--- a/drivers/md/dm-vdo/message-stats.h
+++ b/drivers/md/dm-vdo/message-stats.h
@@ -8,6 +8,7 @@
#include "types.h"
+int vdo_write_config(struct vdo *vdo, char **buf, unsigned int *maxlen);
int vdo_write_stats(struct vdo *vdo, char *buf, unsigned int maxlen);
#endif /* VDO_MESSAGE_STATS_H */
diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c
index 7e0009d2f67d..ffff2c999518 100644
--- a/drivers/md/dm-vdo/repair.c
+++ b/drivers/md/dm-vdo/repair.c
@@ -1202,17 +1202,14 @@ static bool __must_check is_valid_recovery_journal_block(const struct recovery_j
* @journal: The journal to use.
* @header: The unpacked block header to check.
* @sequence: The expected sequence number.
- * @type: The expected metadata type.
*
* Return: True if the block matches.
*/
static bool __must_check is_exact_recovery_journal_block(const struct recovery_journal *journal,
const struct recovery_block_header *header,
- sequence_number_t sequence,
- enum vdo_metadata_type type)
+ sequence_number_t sequence)
{
- return ((header->metadata_type == type) &&
- (header->sequence_number == sequence) &&
+ return ((header->sequence_number == sequence) &&
(is_valid_recovery_journal_block(journal, header, true)));
}
@@ -1371,7 +1368,8 @@ static void extract_entries_from_block(struct repair_completion *repair,
get_recovery_journal_block_header(journal, repair->journal_data,
sequence);
- if (!is_exact_recovery_journal_block(journal, &header, sequence, format)) {
+ if (!is_exact_recovery_journal_block(journal, &header, sequence) ||
+ (header.metadata_type != format)) {
/* This block is invalid, so skip it. */
return;
}
@@ -1557,10 +1555,13 @@ static int parse_journal_for_recovery(struct repair_completion *repair)
sequence_number_t i, head;
bool found_entries = false;
struct recovery_journal *journal = repair->completion.vdo->recovery_journal;
+ struct recovery_block_header header;
+ enum vdo_metadata_type expected_format;
head = min(repair->block_map_head, repair->slab_journal_head);
+ header = get_recovery_journal_block_header(journal, repair->journal_data, head);
+ expected_format = header.metadata_type;
for (i = head; i <= repair->highest_tail; i++) {
- struct recovery_block_header header;
journal_entry_count_t block_entries;
u8 j;
@@ -1572,19 +1573,15 @@ static int parse_journal_for_recovery(struct repair_completion *repair)
};
header = get_recovery_journal_block_header(journal, repair->journal_data, i);
- if (header.metadata_type == VDO_METADATA_RECOVERY_JOURNAL) {
- /* This is an old format block, so we need to upgrade */
- vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
- "Recovery journal is in the old format, a read-only rebuild is required.");
- vdo_enter_read_only_mode(repair->completion.vdo,
- VDO_UNSUPPORTED_VERSION);
- return VDO_UNSUPPORTED_VERSION;
- }
-
- if (!is_exact_recovery_journal_block(journal, &header, i,
- VDO_METADATA_RECOVERY_JOURNAL_2)) {
+ if (!is_exact_recovery_journal_block(journal, &header, i)) {
/* A bad block header was found so this must be the end of the journal. */
break;
+ } else if (header.metadata_type != expected_format) {
+ /* There is a mix of old and new format blocks, so we need to rebuild. */
+ vdo_log_error_strerror(VDO_CORRUPT_JOURNAL,
+ "Recovery journal is in an invalid format, a read-only rebuild is required.");
+ vdo_enter_read_only_mode(repair->completion.vdo, VDO_CORRUPT_JOURNAL);
+ return VDO_CORRUPT_JOURNAL;
}
block_entries = header.entry_count;
@@ -1620,8 +1617,14 @@ static int parse_journal_for_recovery(struct repair_completion *repair)
break;
}
- if (!found_entries)
+ if (!found_entries) {
return validate_heads(repair);
+ } else if (expected_format == VDO_METADATA_RECOVERY_JOURNAL) {
+ /* All journal blocks have the old format, so we need to upgrade. */
+ vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
+ "Recovery journal is in the old format. Downgrade and complete recovery, then upgrade with a clean volume");
+ return VDO_UNSUPPORTED_VERSION;
+ }
/* Set the tail to the last valid tail block, if there is one. */
if (repair->tail_recovery_point.sector_count == 0)
diff --git a/drivers/md/dm-vdo/status-codes.c b/drivers/md/dm-vdo/status-codes.c
index d3493450b169..dd252d660b6d 100644
--- a/drivers/md/dm-vdo/status-codes.c
+++ b/drivers/md/dm-vdo/status-codes.c
@@ -28,7 +28,7 @@ const struct error_info vdo_status_list[] = {
{ "VDO_LOCK_ERROR", "A lock is held incorrectly" },
{ "VDO_READ_ONLY", "The device is in read-only mode" },
{ "VDO_SHUTTING_DOWN", "The device is shutting down" },
- { "VDO_CORRUPT_JOURNAL", "Recovery journal entries corrupted" },
+ { "VDO_CORRUPT_JOURNAL", "Recovery journal corrupted" },
{ "VDO_TOO_MANY_SLABS", "Exceeds maximum number of slabs supported" },
{ "VDO_INVALID_FRAGMENT", "Compressed block fragment is invalid" },
{ "VDO_RETRY_AFTER_REBUILD", "Retry operation after rebuilding finishes" },
diff --git a/drivers/md/dm-vdo/status-codes.h b/drivers/md/dm-vdo/status-codes.h
index 72da04159f88..426dc8e2ca5d 100644
--- a/drivers/md/dm-vdo/status-codes.h
+++ b/drivers/md/dm-vdo/status-codes.h
@@ -52,7 +52,7 @@ enum vdo_status_codes {
VDO_READ_ONLY,
/* the VDO is shutting down */
VDO_SHUTTING_DOWN,
- /* the recovery journal has corrupt entries */
+ /* the recovery journal has corrupt entries or corrupt metadata */
VDO_CORRUPT_JOURNAL,
/* exceeds maximum number of slabs supported */
VDO_TOO_MANY_SLABS,
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 24ba9a10444c..36e4ddfe2d15 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -273,8 +273,10 @@ out:
if (v->mode == DM_VERITY_MODE_LOGGING)
return 0;
- if (v->mode == DM_VERITY_MODE_RESTART)
- kernel_restart("dm-verity device corrupted");
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
if (v->mode == DM_VERITY_MODE_PANIC)
panic("dm-verity device corrupted");
@@ -597,6 +599,23 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh)
verity_fec_finish_io(io);
+ if (unlikely(status != BLK_STS_OK) &&
+ unlikely(!(bio->bi_opf & REQ_RAHEAD)) &&
+ !verity_is_system_shutting_down()) {
+ if (v->mode == DM_VERITY_MODE_RESTART ||
+ v->mode == DM_VERITY_MODE_PANIC)
+ DMERR_LIMIT("%s has error: %s", v->data_dev->name,
+ blk_status_to_str(status));
+
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
+
+ if (v->mode == DM_VERITY_MODE_PANIC)
+ panic("dm-verity device corrupted");
+ }
+
bio_endio(bio);
}
diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c
index d351d7d39c60..a9e2c6c0a33c 100644
--- a/drivers/md/dm-verity-verify-sig.c
+++ b/drivers/md/dm-verity-verify-sig.c
@@ -127,7 +127,7 @@ int verity_verify_root_hash(const void *root_hash, size_t root_hash_len,
#endif
VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL);
#ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING
- if (ret == -ENOKEY)
+ if (ret == -ENOKEY || ret == -EKEYREJECTED)
ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data,
sig_len,
VERIFY_USE_PLATFORM_KEYRING,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 87bb90303435..ff4a6b570b76 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2030,10 +2030,15 @@ static void dm_submit_bio(struct bio *bio)
struct dm_table *map;
map = dm_get_live_table(md, &srcu_idx);
+ if (unlikely(!map)) {
+ DMERR_LIMIT("%s: mapping table unavailable, erroring io",
+ dm_device_name(md));
+ bio_io_error(bio);
+ goto out;
+ }
- /* If suspended, or map not yet available, queue this IO for later */
- if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
- unlikely(!map)) {
+ /* If suspended, queue this IO for later */
+ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
else if (bio->bi_opf & REQ_RAHEAD)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index cc466ad5cb1d..8ad782249af8 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -109,7 +109,6 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone);
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
-int dm_zone_map_bio(struct dm_target_io *io);
int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
sector_t sector, unsigned int nr_zones,
unsigned long *need_reset);
@@ -119,10 +118,6 @@ static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
{
return false;
}
-static inline int dm_zone_map_bio(struct dm_target_io *tio)
-{
- return DM_MAPIO_KILL;
-}
#endif
/*
diff --git a/drivers/media/cec/core/cec-api.c b/drivers/media/cec/core/cec-api.c
index c75a4057f00e..c50299246fc4 100644
--- a/drivers/media/cec/core/cec-api.c
+++ b/drivers/media/cec/core/cec-api.c
@@ -698,5 +698,4 @@ const struct file_operations cec_devnode_fops = {
.compat_ioctl = cec_ioctl,
.release = cec_release,
.poll = cec_poll,
- .llseek = no_llseek,
};
diff --git a/drivers/media/mc/mc-devnode.c b/drivers/media/mc/mc-devnode.c
index 318e267e798e..56444edaf136 100644
--- a/drivers/media/mc/mc-devnode.c
+++ b/drivers/media/mc/mc-devnode.c
@@ -204,7 +204,6 @@ static const struct file_operations media_devnode_fops = {
#endif /* CONFIG_COMPAT */
.release = media_release,
.poll = media_poll,
- .llseek = no_llseek,
};
int __must_check media_devnode_register(struct media_device *mdev,
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index b8dfd530fab7..f042f3f14afa 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -706,7 +706,6 @@ static const struct file_operations lirc_fops = {
.poll = lirc_poll,
.open = lirc_open,
.release = lirc_close,
- .llseek = no_llseek,
};
static void lirc_release_device(struct device *ld)
diff --git a/drivers/media/usb/uvc/uvc_debugfs.c b/drivers/media/usb/uvc/uvc_debugfs.c
index 1a1258d4ffca..14fa41cb8148 100644
--- a/drivers/media/usb/uvc/uvc_debugfs.c
+++ b/drivers/media/usb/uvc/uvc_debugfs.c
@@ -59,7 +59,6 @@ static int uvc_debugfs_stats_release(struct inode *inode, struct file *file)
static const struct file_operations uvc_debugfs_stats_fops = {
.owner = THIS_MODULE,
.open = uvc_debugfs_stats_open,
- .llseek = no_llseek,
.read = uvc_debugfs_stats_read,
.release = uvc_debugfs_stats_release,
};
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 570ba00e00b3..3d7711cc42bc 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -483,7 +483,6 @@ static const struct file_operations v4l2_fops = {
#endif
.release = v4l2_release,
.poll = v4l2_poll,
- .llseek = no_llseek,
};
/**
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index 9f3999750c23..77fa55df70d0 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -1609,7 +1609,7 @@ mptctl_eventreport (MPT_ADAPTER *ioc, unsigned long arg)
maxEvents = numBytes/sizeof(MPT_IOCTL_EVENTS);
- max = MPTCTL_EVENT_LOG_SIZE < maxEvents ? MPTCTL_EVENT_LOG_SIZE : maxEvents;
+ max = min(maxEvents, MPTCTL_EVENT_LOG_SIZE);
/* If fewer than 1 event is requested, there must have
* been some type of error.
@@ -2691,7 +2691,6 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg)
static const struct file_operations mptctl_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.fasync = mptctl_fasync,
.unlocked_ioctl = mptctl_ioctl,
#ifdef CONFIG_COMPAT
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
index 1fc635a27568..4233dc4cc7d6 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -669,7 +669,6 @@ static int lis3lv02d_misc_fasync(int fd, struct file *file, int on)
static const struct file_operations lis3lv02d_misc_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = lis3lv02d_misc_read,
.open = lis3lv02d_misc_open,
.release = lis3lv02d_misc_release,
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 40c3fe26f76d..1f5aaf16e300 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -1176,7 +1176,6 @@ static const struct file_operations mei_fops = {
.poll = mei_poll,
.fsync = mei_fsync,
.fasync = mei_fasync,
- .llseek = no_llseek
};
/**
diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 3c2f743c58b0..4954553b7baa 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -126,7 +126,6 @@ static const struct file_operations ntsync_obj_fops = {
.release = ntsync_obj_release,
.unlocked_ioctl = ntsync_obj_ioctl,
.compat_ioctl = compat_ptr_ioctl,
- .llseek = no_llseek,
};
static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev,
@@ -233,7 +232,6 @@ static const struct file_operations ntsync_fops = {
.release = ntsync_char_release,
.unlocked_ioctl = ntsync_char_ioctl,
.compat_ioctl = compat_ptr_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice ntsync_misc = {
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 30bd7c39c261..701db2c5859b 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -279,7 +279,6 @@ static const struct file_operations phantom_file_ops = {
.unlocked_ioctl = phantom_ioctl,
.compat_ioctl = phantom_compat_ioctl,
.poll = phantom_poll,
- .llseek = no_llseek,
};
static irqreturn_t phantom_isr(int irq, void *data)
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index f58bea534004..ef06a4d5d65b 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2734,7 +2734,6 @@ static const struct file_operations mmc_rpmb_fileops = {
.release = mmc_rpmb_chrdev_release,
.open = mmc_rpmb_chrdev_open,
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = mmc_rpmb_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = mmc_rpmb_ioctl_compat,
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 0d8f04cf03c5..6bb80d7714bc 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -1095,7 +1095,6 @@ const struct file_operations ubi_vol_cdev_operations = {
/* UBI character device operations */
const struct file_operations ubi_cdev_operations = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = ubi_cdev_ioctl,
.compat_ioctl = compat_ptr_ioctl,
};
@@ -1105,5 +1104,4 @@ const struct file_operations ubi_ctrl_cdev_operations = {
.owner = THIS_MODULE,
.unlocked_ioctl = ctrl_cdev_ioctl,
.compat_ioctl = compat_ptr_ioctl,
- .llseek = no_llseek,
};
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index 9ec3b8b6a0aa..d2a53961d8e2 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -470,7 +470,6 @@ static const struct file_operations dfs_fops = {
.read = dfs_file_read,
.write = dfs_file_write,
.open = simple_open,
- .llseek = no_llseek,
.owner = THIS_MODULE,
};
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index a1f91ff8ec56..41e80f78b316 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -1414,7 +1414,6 @@ out:
static const struct file_operations nsim_nexthop_bucket_activity_fops = {
.open = simple_open,
.write = nsim_nexthop_bucket_activity_write,
- .llseek = no_llseek,
.owner = THIS_MODULE,
};
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 77574f7a3bd4..5aa41d5f7765 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -1162,7 +1162,6 @@ static const struct file_operations tap_fops = {
.read_iter = tap_read_iter,
.write_iter = tap_write_iter,
.poll = tap_poll,
- .llseek = no_llseek,
.unlocked_ioctl = tap_ioctl,
.compat_ioctl = compat_ptr_ioctl,
};
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 5f77faef0ff1..9a0f6eb32016 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -3543,7 +3543,6 @@ static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file)
static const struct file_operations tun_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read_iter = tun_chr_read_iter,
.write_iter = tun_chr_write_iter,
.poll = tun_chr_poll,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index df53dd1d7e74..da72fd2d541f 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -1184,7 +1184,6 @@ static ssize_t bus_reset_write(struct file *file, const char __user *user_buf,
static const struct file_operations bus_reset_fops = {
.open = simple_open,
- .llseek = no_llseek,
.write = bus_reset_write,
};
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 99a541d442bb..49a6aff42376 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -3768,7 +3768,6 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file)
}
const struct file_operations iwl_dbgfs_d3_test_ops = {
- .llseek = no_llseek,
.open = iwl_mvm_d3_test_open,
.read = iwl_mvm_d3_test_read,
.release = iwl_mvm_d3_test_release,
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 4525ad1b59f4..839154c46e46 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -302,7 +302,6 @@ static const struct file_operations cros_ec_console_log_fops = {
.owner = THIS_MODULE,
.open = cros_ec_console_log_open,
.read = cros_ec_console_log_read,
- .llseek = no_llseek,
.poll = cros_ec_console_log_poll,
.release = cros_ec_console_log_release,
};
diff --git a/drivers/platform/chrome/wilco_ec/debugfs.c b/drivers/platform/chrome/wilco_ec/debugfs.c
index 983f2fa44ba5..99486086af6a 100644
--- a/drivers/platform/chrome/wilco_ec/debugfs.c
+++ b/drivers/platform/chrome/wilco_ec/debugfs.c
@@ -156,7 +156,6 @@ static const struct file_operations fops_raw = {
.owner = THIS_MODULE,
.read = raw_read,
.write = raw_write,
- .llseek = no_llseek,
};
#define CMD_KB_CHROME 0x88
diff --git a/drivers/platform/chrome/wilco_ec/event.c b/drivers/platform/chrome/wilco_ec/event.c
index bd1fb53ba028..196e46a1d489 100644
--- a/drivers/platform/chrome/wilco_ec/event.c
+++ b/drivers/platform/chrome/wilco_ec/event.c
@@ -403,7 +403,6 @@ static const struct file_operations event_fops = {
.poll = event_poll,
.read = event_read,
.release = event_release,
- .llseek = no_llseek,
.owner = THIS_MODULE,
};
diff --git a/drivers/platform/chrome/wilco_ec/telemetry.c b/drivers/platform/chrome/wilco_ec/telemetry.c
index 21d4cbbb009a..a87877e4300a 100644
--- a/drivers/platform/chrome/wilco_ec/telemetry.c
+++ b/drivers/platform/chrome/wilco_ec/telemetry.c
@@ -330,7 +330,6 @@ static const struct file_operations telem_fops = {
.write = telem_write,
.read = telem_read,
.release = telem_release,
- .llseek = no_llseek,
.owner = THIS_MODULE,
};
diff --git a/drivers/platform/surface/surface_aggregator_cdev.c b/drivers/platform/surface/surface_aggregator_cdev.c
index 07e065b9159f..165b1416230d 100644
--- a/drivers/platform/surface/surface_aggregator_cdev.c
+++ b/drivers/platform/surface/surface_aggregator_cdev.c
@@ -670,7 +670,6 @@ static const struct file_operations ssam_controller_fops = {
.fasync = ssam_cdev_fasync,
.unlocked_ioctl = ssam_cdev_device_ioctl,
.compat_ioctl = ssam_cdev_device_ioctl,
- .llseek = no_llseek,
};
diff --git a/drivers/platform/surface/surface_dtx.c b/drivers/platform/surface/surface_dtx.c
index 2de843b7ea70..89ca6b50e812 100644
--- a/drivers/platform/surface/surface_dtx.c
+++ b/drivers/platform/surface/surface_dtx.c
@@ -555,7 +555,6 @@ static const struct file_operations surface_dtx_fops = {
.fasync = surface_dtx_fasync,
.unlocked_ioctl = surface_dtx_ioctl,
.compat_ioctl = surface_dtx_ioctl,
- .llseek = no_llseek,
};
diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
index 5d19baae6a38..25d47907db17 100644
--- a/drivers/pps/pps.c
+++ b/drivers/pps/pps.c
@@ -319,7 +319,6 @@ static int pps_cdev_release(struct inode *inode, struct file *file)
static const struct file_operations pps_cdev_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.poll = pps_cdev_poll,
.fasync = pps_cdev_fasync,
.compat_ioctl = pps_cdev_compat_ioctl,
diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c
index 4aad9bb99868..c4a3ab53dcd4 100644
--- a/drivers/rtc/dev.c
+++ b/drivers/rtc/dev.c
@@ -523,7 +523,6 @@ static int rtc_dev_release(struct inode *inode, struct file *file)
static const struct file_operations rtc_dev_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = rtc_dev_read,
.poll = rtc_dev_poll,
.unlocked_ioctl = rtc_dev_ioctl,
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 0013bff0447d..1f58ae8b151e 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -850,7 +850,6 @@ static const struct file_operations wdt_fops = {
.write = wdt_write,
.open = wdt_open,
.release = wdt_release,
- .llseek = no_llseek,
};
static struct miscdevice wdt_dev = {
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 61515781c5dd..cfe7efd5b5da 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -515,7 +515,6 @@ static const struct file_operations fs3270_fops = {
.compat_ioctl = fs3270_ioctl, /* ioctl */
.open = fs3270_open, /* open */
.release = fs3270_close, /* release */
- .llseek = no_llseek,
};
static void fs3270_create_cb(int minor)
diff --git a/drivers/s390/char/sclp_ctl.c b/drivers/s390/char/sclp_ctl.c
index 248b5db3eaa8..dd6051602070 100644
--- a/drivers/s390/char/sclp_ctl.c
+++ b/drivers/s390/char/sclp_ctl.c
@@ -115,7 +115,6 @@ static const struct file_operations sclp_ctl_fops = {
.open = nonseekable_open,
.unlocked_ioctl = sclp_ctl_ioctl,
.compat_ioctl = sclp_ctl_ioctl,
- .llseek = no_llseek,
};
/*
diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c
index cc8237afeffa..89778d922d9f 100644
--- a/drivers/s390/char/tape_char.c
+++ b/drivers/s390/char/tape_char.c
@@ -52,7 +52,6 @@ static const struct file_operations tape_fops =
#endif
.open = tapechar_open,
.release = tapechar_release,
- .llseek = no_llseek,
};
static int tapechar_major = TAPECHAR_MAJOR;
diff --git a/drivers/s390/char/uvdevice.c b/drivers/s390/char/uvdevice.c
index 42c9f77f8da0..f598edc5f251 100644
--- a/drivers/s390/char/uvdevice.c
+++ b/drivers/s390/char/uvdevice.c
@@ -448,7 +448,6 @@ static long uvio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static const struct file_operations uvio_dev_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = uvio_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice uvio_dev_miscdev = {
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index eb0520a9d4af..c6d58335beb4 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -242,7 +242,6 @@ static const struct file_operations vmcp_fops = {
.write = vmcp_write,
.unlocked_ioctl = vmcp_ioctl,
.compat_ioctl = vmcp_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice vmcp_dev = {
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index c09e1e09fb66..bd5cecc44123 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -96,7 +96,6 @@ static const struct file_operations vmlogrdr_fops = {
.open = vmlogrdr_open,
.release = vmlogrdr_release,
.read = vmlogrdr_read,
- .llseek = no_llseek,
};
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 0969fa01df58..33cebb91b933 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -165,7 +165,6 @@ static const struct file_operations zcore_reipl_fops = {
.write = zcore_reipl_write,
.open = zcore_reipl_open,
.release = zcore_reipl_release,
- .llseek = no_llseek,
};
static ssize_t zcore_hsa_read(struct file *filp, char __user *buf,
@@ -200,7 +199,6 @@ static const struct file_operations zcore_hsa_fops = {
.write = zcore_hsa_write,
.read = zcore_hsa_read,
.open = nonseekable_open,
- .llseek = no_llseek,
};
static int __init check_sdias(void)
diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c
index e6c800653f98..1e58ee3cc87d 100644
--- a/drivers/s390/cio/chsc_sch.c
+++ b/drivers/s390/cio/chsc_sch.c
@@ -924,7 +924,6 @@ static const struct file_operations chsc_fops = {
.release = chsc_release,
.unlocked_ioctl = chsc_ioctl,
.compat_ioctl = chsc_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice chsc_misc_device = {
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 53b68f8c32f3..7b59d20bf785 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -1332,7 +1332,6 @@ static ssize_t cio_settle_write(struct file *file, const char __user *buf,
static const struct proc_ops cio_settle_proc_ops = {
.proc_open = nonseekable_open,
.proc_write = cio_settle_write,
- .proc_lseek = no_llseek,
};
static int __init cio_settle_init(void)
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index c20251e00cf9..3a39e167bdbf 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -776,7 +776,6 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
static const struct file_operations pkey_fops = {
.owner = THIS_MODULE,
.open = nonseekable_open,
- .llseek = no_llseek,
.unlocked_ioctl = pkey_unlocked_ioctl,
};
diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
index 4aeb3e1213c7..67a807e2e75b 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -26,6 +26,18 @@ MODULE_LICENSE("GPL v2");
struct ap_matrix_dev *matrix_dev;
debug_info_t *vfio_ap_dbf_info;
+static ssize_t features_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "guest_matrix hotplug ap_config\n");
+}
+static DEVICE_ATTR_RO(features);
+
+static struct attribute *matrix_dev_attrs[] = {
+ &dev_attr_features.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(matrix_dev);
+
/* Only type 10 adapters (CEX4 and later) are supported
* by the AP matrix device driver
*/
@@ -68,6 +80,7 @@ static struct device_driver matrix_driver = {
.name = "vfio_ap",
.bus = &matrix_bus,
.suppress_bind_attrs = true,
+ .dev_groups = matrix_dev_groups,
};
static int vfio_ap_matrix_dev_create(void)
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index f9a47b54c51a..5020696f1379 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -1908,7 +1908,6 @@ static const struct file_operations zcrypt_fops = {
#endif
.open = zcrypt_open,
.release = zcrypt_release,
- .llseek = no_llseek,
};
/*
diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c
index cc178874c4a6..8643947fee8e 100644
--- a/drivers/sbus/char/openprom.c
+++ b/drivers/sbus/char/openprom.c
@@ -687,7 +687,6 @@ static int openprom_release(struct inode * inode, struct file * file)
static const struct file_operations openprom_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = openprom_ioctl,
.compat_ioctl = openprom_compat_ioctl,
.open = openprom_open,
diff --git a/drivers/sbus/char/uctrl.c b/drivers/sbus/char/uctrl.c
index 3c88f29f4c47..8bbed7a7afb7 100644
--- a/drivers/sbus/char/uctrl.c
+++ b/drivers/sbus/char/uctrl.c
@@ -221,7 +221,6 @@ static irqreturn_t uctrl_interrupt(int irq, void *dev_id)
static const struct file_operations uctrl_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = uctrl_ioctl,
.open = uctrl_open,
};
diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h
index d92cf1dccc2f..0909b03e2497 100644
--- a/drivers/scsi/cxgbi/libcxgbi.h
+++ b/drivers/scsi/cxgbi/libcxgbi.h
@@ -485,7 +485,6 @@ struct cxgbi_device {
unsigned char nmtus;
unsigned char nports;
struct pci_dev *pdev;
- struct dentry *debugfs_root;
struct iscsi_transport *itp;
struct module *owner;
@@ -499,7 +498,6 @@ struct cxgbi_device {
unsigned int rxq_idx_cntr;
struct cxgbi_ports_map pmap;
- void (*dev_ddp_cleanup)(struct cxgbi_device *);
struct cxgbi_ppm* (*cdev2ppm)(struct cxgbi_device *);
int (*csk_ddp_set_map)(struct cxgbi_ppm *, struct cxgbi_sock *,
struct cxgbi_task_tag_info *);
@@ -512,7 +510,6 @@ struct cxgbi_device {
unsigned int, int);
void (*csk_release_offload_resources)(struct cxgbi_sock *);
- int (*csk_rx_pdu_ready)(struct cxgbi_sock *, struct sk_buff *);
u32 (*csk_send_rx_credits)(struct cxgbi_sock *, u32);
int (*csk_push_tx_frames)(struct cxgbi_sock *, int);
void (*csk_send_abort_req)(struct cxgbi_sock *);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index feda9b54b443..4cd3a3eab6f1 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -2421,7 +2421,7 @@ out:
spin_lock_irqsave(&device->done_lock, flags);
if (test_bit(SAS_HA_FROZEN, &ha->state)) {
spin_unlock_irqrestore(&device->done_lock, flags);
- dev_info(dev, "slot complete: task(%pK) ignored\n ",
+ dev_info(dev, "slot complete: task(%pK) ignored\n",
task);
return;
}
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index a3d1013c8307..e66c3ef74267 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -37,6 +37,7 @@ static unsigned int default_timeout = IBMVFC_DEFAULT_TIMEOUT;
static u64 max_lun = IBMVFC_MAX_LUN;
static unsigned int max_targets = IBMVFC_MAX_TARGETS;
static unsigned int max_requests = IBMVFC_MAX_REQUESTS_DEFAULT;
+static u16 max_sectors = IBMVFC_MAX_SECTORS;
static u16 scsi_qdepth = IBMVFC_SCSI_QDEPTH;
static unsigned int disc_threads = IBMVFC_MAX_DISC_THREADS;
static unsigned int ibmvfc_debug = IBMVFC_DEBUG;
@@ -83,6 +84,9 @@ MODULE_PARM_DESC(default_timeout,
module_param_named(max_requests, max_requests, uint, S_IRUGO);
MODULE_PARM_DESC(max_requests, "Maximum requests for this adapter. "
"[Default=" __stringify(IBMVFC_MAX_REQUESTS_DEFAULT) "]");
+module_param_named(max_sectors, max_sectors, ushort, S_IRUGO);
+MODULE_PARM_DESC(max_sectors, "Maximum sectors for this adapter. "
+ "[Default=" __stringify(IBMVFC_MAX_SECTORS) "]");
module_param_named(scsi_qdepth, scsi_qdepth, ushort, S_IRUGO);
MODULE_PARM_DESC(scsi_qdepth, "Maximum scsi command depth per adapter queue. "
"[Default=" __stringify(IBMVFC_SCSI_QDEPTH) "]");
@@ -1494,7 +1498,7 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
memset(login_info, 0, sizeof(*login_info));
login_info->ostype = cpu_to_be32(IBMVFC_OS_LINUX);
- login_info->max_dma_len = cpu_to_be64(IBMVFC_MAX_SECTORS << 9);
+ login_info->max_dma_len = cpu_to_be64(max_sectors << 9);
login_info->max_payload = cpu_to_be32(sizeof(struct ibmvfc_fcp_cmd_iu));
login_info->max_response = cpu_to_be32(sizeof(struct ibmvfc_fcp_rsp));
login_info->partition_num = cpu_to_be32(vhost->partition_number);
@@ -5230,7 +5234,7 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
}
vhost->logged_in = 1;
- npiv_max_sectors = min((uint)(be64_to_cpu(rsp->max_dma_len) >> 9), IBMVFC_MAX_SECTORS);
+ npiv_max_sectors = min((uint)(be64_to_cpu(rsp->max_dma_len) >> 9), max_sectors);
dev_info(vhost->dev, "Host partition: %s, device: %s %s %s max sectors %u\n",
rsp->partition_name, rsp->device_name, rsp->port_loc_code,
rsp->drc_name, npiv_max_sectors);
@@ -6329,7 +6333,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
shost->can_queue = scsi_qdepth;
shost->max_lun = max_lun;
shost->max_id = max_targets;
- shost->max_sectors = IBMVFC_MAX_SECTORS;
+ shost->max_sectors = max_sectors;
shost->max_cmd_len = IBMVFC_MAX_CDB_LEN;
shost->unique_id = shost->host_no;
shost->nr_hw_queues = mq_enabled ? min(max_scsi_queues, nr_scsi_hw_queues) : 1;
@@ -6556,6 +6560,7 @@ static struct fc_function_template ibmvfc_transport_functions = {
**/
static int __init ibmvfc_module_init(void)
{
+ int min_max_sectors = PAGE_SIZE >> 9;
int rc;
if (!firmware_has_feature(FW_FEATURE_VIO))
@@ -6564,6 +6569,16 @@ static int __init ibmvfc_module_init(void)
printk(KERN_INFO IBMVFC_NAME": IBM Virtual Fibre Channel Driver version: %s %s\n",
IBMVFC_DRIVER_VERSION, IBMVFC_DRIVER_DATE);
+ /*
+ * Range check the max_sectors module parameter. The upper bounds is
+ * implicity checked since the parameter is a ushort.
+ */
+ if (max_sectors < min_max_sectors) {
+ printk(KERN_ERR IBMVFC_NAME ": max_sectors must be at least %d.\n",
+ min_max_sectors);
+ max_sectors = min_max_sectors;
+ }
+
ibmvfc_transport_template = fc_attach_transport(&ibmvfc_transport_functions);
if (!ibmvfc_transport_template)
return -ENOMEM;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 745ad5ac7251..c73ed2314ad0 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -32,7 +32,7 @@
#define IBMVFC_DEBUG 0
#define IBMVFC_MAX_TARGETS 1024
#define IBMVFC_MAX_LUN 0xffffffff
-#define IBMVFC_MAX_SECTORS 0xffffu
+#define IBMVFC_MAX_SECTORS 2048
#define IBMVFC_MAX_DISC_THREADS 4
#define IBMVFC_TGT_MEMPOOL_SZ 64
#define IBMVFC_MAX_CMDS_PER_LUN 64
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 4756a3f82531..85059b83ea6b 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -3208,6 +3208,9 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job)
cmdiocbq->num_bdes = num_bde;
cmdiocbq->cmd_flag |= LPFC_IO_LIBDFC;
cmdiocbq->cmd_flag |= LPFC_IO_LOOPBACK;
+ if (phba->cfg_vmid_app_header)
+ cmdiocbq->cmd_flag |= LPFC_IO_VMID;
+
cmdiocbq->vport = phba->pport;
cmdiocbq->cmd_cmpl = NULL;
cmdiocbq->bpl_dmabuf = txbmp;
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 2dedd1493e5b..134bc96dd134 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1572,8 +1572,8 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
}
}
} else
- lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
- "3065 GFT_ID failed x%08x\n", ulp_status);
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_DISCOVERY,
+ "3065 GFT_ID status x%08x\n", ulp_status);
out:
lpfc_ct_free_iocb(phba, cmdiocb);
@@ -1647,6 +1647,18 @@ lpfc_cmpl_ct(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
}
out:
+ /* If the caller wanted a synchronous DA_ID completion, signal the
+ * wait obj and clear flag to reset the vport.
+ */
+ if (ndlp->save_flags & NLP_WAIT_FOR_DA_ID) {
+ if (ndlp->da_id_waitq)
+ wake_up(ndlp->da_id_waitq);
+ }
+
+ spin_lock_irq(&ndlp->lock);
+ ndlp->save_flags &= ~NLP_WAIT_FOR_DA_ID;
+ spin_unlock_irq(&ndlp->lock);
+
lpfc_ct_free_iocb(phba, cmdiocb);
lpfc_nlp_put(ndlp);
return;
@@ -2246,7 +2258,7 @@ lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
}
lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
- "0229 FDMI cmd %04x failed, latt = %d "
+ "0229 FDMI cmd %04x latt = %d "
"ulp_status: x%x, rid x%x\n",
be16_to_cpu(fdmi_cmd), latt, ulp_status,
ulp_word4);
@@ -2263,9 +2275,9 @@ lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
/* Check for a CT LS_RJT response */
cmd = be16_to_cpu(fdmi_cmd);
if (be16_to_cpu(fdmi_rsp) == SLI_CT_RESPONSE_FS_RJT) {
- /* FDMI rsp failed */
+ /* Log FDMI reject */
lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_ELS,
- "0220 FDMI cmd failed FS_RJT Data: x%x", cmd);
+ "0220 FDMI cmd FS_RJT Data: x%x", cmd);
/* Should we fallback to FDMI-2 / FDMI-1 ? */
switch (cmd) {
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index f82615d87c4b..f5ae8cc15820 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -90,6 +90,8 @@ enum lpfc_nlp_save_flags {
NLP_IN_RECOV_POST_DEV_LOSS = 0x1,
/* wait for outstanding LOGO to cmpl */
NLP_WAIT_FOR_LOGO = 0x2,
+ /* wait for outstanding DA_ID to finish */
+ NLP_WAIT_FOR_DA_ID = 0x4
};
struct lpfc_nodelist {
@@ -159,7 +161,12 @@ struct lpfc_nodelist {
uint32_t nvme_fb_size; /* NVME target's supported byte cnt */
#define NVME_FB_BIT_SHIFT 9 /* PRLI Rsp first burst in 512B units. */
uint32_t nlp_defer_did;
+
+ /* These wait objects are NPIV specific. These IOs must complete
+ * synchronously.
+ */
wait_queue_head_t *logo_waitq;
+ wait_queue_head_t *da_id_waitq;
};
struct lpfc_node_rrq {
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index de0ec945d2f1..d737b897ddd8 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -979,7 +979,7 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
phba->fcoe_cvl_eventtag_attn =
phba->fcoe_cvl_eventtag;
lpfc_printf_log(phba, KERN_WARNING, LOG_FIP | LOG_ELS,
- "2611 FLOGI failed on FCF (x%x), "
+ "2611 FLOGI FCF (x%x), "
"status:x%x/x%x, tmo:x%x, perform "
"roundrobin FCF failover\n",
phba->fcf.current_rec.fcf_indx,
@@ -997,11 +997,11 @@ stop_rr_fcf_flogi:
if (!(ulp_status == IOSTAT_LOCAL_REJECT &&
((ulp_word4 & IOERR_PARAM_MASK) ==
IOERR_LOOP_OPEN_FAILURE)))
- lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
- "2858 FLOGI failure Status:x%x/x%x TMO"
- ":x%x Data x%lx x%x\n",
- ulp_status, ulp_word4, tmo,
- phba->hba_flag, phba->fcf.fcf_flag);
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
+ "2858 FLOGI Status:x%x/x%x TMO"
+ ":x%x Data x%lx x%x\n",
+ ulp_status, ulp_word4, tmo,
+ phba->hba_flag, phba->fcf.fcf_flag);
/* Check for retry */
if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
@@ -1023,7 +1023,7 @@ stop_rr_fcf_flogi:
lpfc_nlp_put(ndlp);
lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
- "0150 FLOGI failure Status:x%x/x%x "
+ "0150 FLOGI Status:x%x/x%x "
"xri x%x TMO:x%x refcnt %d\n",
ulp_status, ulp_word4, cmdiocb->sli4_xritag,
tmo, kref_read(&ndlp->kref));
@@ -1032,11 +1032,11 @@ stop_rr_fcf_flogi:
if (!(ulp_status == IOSTAT_LOCAL_REJECT &&
((ulp_word4 & IOERR_PARAM_MASK) ==
IOERR_LOOP_OPEN_FAILURE))) {
- /* FLOGI failure */
- lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
- "0100 FLOGI failure Status:x%x/x%x "
- "TMO:x%x\n",
- ulp_status, ulp_word4, tmo);
+ /* Warn FLOGI status */
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
+ "0100 FLOGI Status:x%x/x%x "
+ "TMO:x%x\n",
+ ulp_status, ulp_word4, tmo);
goto flogifail;
}
@@ -1964,16 +1964,16 @@ lpfc_cmpl_els_rrq(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
if (ulp_status) {
/* Check for retry */
- /* RRQ failed Don't print the vport to vport rjts */
+ /* Warn RRQ status Don't print the vport to vport rjts */
if (ulp_status != IOSTAT_LS_RJT ||
(((ulp_word4) >> 16 != LSRJT_INVALID_CMD) &&
((ulp_word4) >> 16 != LSRJT_UNABLE_TPC)) ||
(phba)->pport->cfg_log_verbose & LOG_ELS)
- lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
- "2881 RRQ failure DID:%06X Status:"
- "x%x/x%x\n",
- ndlp->nlp_DID, ulp_status,
- ulp_word4);
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
+ "2881 RRQ DID:%06X Status:"
+ "x%x/x%x\n",
+ ndlp->nlp_DID, ulp_status,
+ ulp_word4);
}
lpfc_clr_rrq_active(phba, rrq->xritag, rrq);
@@ -2077,16 +2077,16 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
}
goto out;
}
- /* PLOGI failed Don't print the vport to vport rjts */
+ /* Warn PLOGI status Don't print the vport to vport rjts */
if (ulp_status != IOSTAT_LS_RJT ||
(((ulp_word4) >> 16 != LSRJT_INVALID_CMD) &&
((ulp_word4) >> 16 != LSRJT_UNABLE_TPC)) ||
(phba)->pport->cfg_log_verbose & LOG_ELS)
- lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
- "2753 PLOGI failure DID:%06X "
- "Status:x%x/x%x\n",
- ndlp->nlp_DID, ulp_status,
- ulp_word4);
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
+ "2753 PLOGI DID:%06X "
+ "Status:x%x/x%x\n",
+ ndlp->nlp_DID, ulp_status,
+ ulp_word4);
/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
if (!lpfc_error_lost_link(vport, ulp_status, ulp_word4))
@@ -2323,7 +2323,6 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
struct lpfc_vport *vport = cmdiocb->vport;
struct lpfc_nodelist *ndlp;
char *mode;
- u32 loglevel;
u32 ulp_status;
u32 ulp_word4;
bool release_node = false;
@@ -2372,17 +2371,14 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
* could be expected.
*/
if (test_bit(FC_FABRIC, &vport->fc_flag) ||
- vport->cfg_enable_fc4_type != LPFC_ENABLE_BOTH) {
- mode = KERN_ERR;
- loglevel = LOG_TRACE_EVENT;
- } else {
+ vport->cfg_enable_fc4_type != LPFC_ENABLE_BOTH)
+ mode = KERN_WARNING;
+ else
mode = KERN_INFO;
- loglevel = LOG_ELS;
- }
- /* PRLI failed */
- lpfc_printf_vlog(vport, mode, loglevel,
- "2754 PRLI failure DID:%06X Status:x%x/x%x, "
+ /* Warn PRLI status */
+ lpfc_printf_vlog(vport, mode, LOG_ELS,
+ "2754 PRLI DID:%06X Status:x%x/x%x, "
"data: x%x x%x x%x\n",
ndlp->nlp_DID, ulp_status,
ulp_word4, ndlp->nlp_state,
@@ -2854,11 +2850,11 @@ lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
}
goto out;
}
- /* ADISC failed */
- lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
- "2755 ADISC failure DID:%06X Status:x%x/x%x\n",
- ndlp->nlp_DID, ulp_status,
- ulp_word4);
+ /* Warn ADISC status */
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
+ "2755 ADISC DID:%06X Status:x%x/x%x\n",
+ ndlp->nlp_DID, ulp_status,
+ ulp_word4);
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
NLP_EVT_CMPL_ADISC);
@@ -3045,12 +3041,12 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
* discovery. The PLOGI will retry.
*/
if (ulp_status) {
- /* LOGO failed */
- lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
- "2756 LOGO failure, No Retry DID:%06X "
- "Status:x%x/x%x\n",
- ndlp->nlp_DID, ulp_status,
- ulp_word4);
+ /* Warn LOGO status */
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
+ "2756 LOGO, No Retry DID:%06X "
+ "Status:x%x/x%x\n",
+ ndlp->nlp_DID, ulp_status,
+ ulp_word4);
if (lpfc_error_lost_link(vport, ulp_status, ulp_word4))
skip_recovery = 1;
@@ -4837,11 +4833,10 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
(cmd == ELS_CMD_FDISC) &&
(stat.un.b.lsRjtRsnCodeExp == LSEXP_OUT_OF_RESOURCE)){
- lpfc_printf_vlog(vport, KERN_ERR,
- LOG_TRACE_EVENT,
- "0125 FDISC Failed (x%x). "
- "Fabric out of resources\n",
- stat.un.lsRjtError);
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
+ "0125 FDISC (x%x). "
+ "Fabric out of resources\n",
+ stat.un.lsRjtError);
lpfc_vport_set_state(vport,
FC_VPORT_NO_FABRIC_RSCS);
}
@@ -4877,11 +4872,10 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
LSEXP_NOTHING_MORE) {
vport->fc_sparam.cmn.bbRcvSizeMsb &= 0xf;
retry = 1;
- lpfc_printf_vlog(vport, KERN_ERR,
- LOG_TRACE_EVENT,
- "0820 FLOGI Failed (x%x). "
- "BBCredit Not Supported\n",
- stat.un.lsRjtError);
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
+ "0820 FLOGI (x%x). "
+ "BBCredit Not Supported\n",
+ stat.un.lsRjtError);
}
break;
@@ -4891,11 +4885,10 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
((stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_PNAME) ||
(stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_NPORT_ID))
) {
- lpfc_printf_vlog(vport, KERN_ERR,
- LOG_TRACE_EVENT,
- "0122 FDISC Failed (x%x). "
- "Fabric Detected Bad WWN\n",
- stat.un.lsRjtError);
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
+ "0122 FDISC (x%x). "
+ "Fabric Detected Bad WWN\n",
+ stat.un.lsRjtError);
lpfc_vport_set_state(vport,
FC_VPORT_FABRIC_REJ_WWN);
}
@@ -5355,8 +5348,8 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
u32 ulp_status, ulp_word4, tmo, did, iotag;
if (!vport) {
- lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
- "3177 ELS response failed\n");
+ lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
+ "3177 null vport in ELS rsp\n");
goto out;
}
if (cmdiocb->context_un.mbox)
@@ -9658,11 +9651,12 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
if (piocb->cmd_flag & LPFC_DRIVER_ABORTED && !mbx_tmo_err)
continue;
- /* On the ELS ring we can have ELS_REQUESTs or
- * GEN_REQUESTs waiting for a response.
+ /* On the ELS ring we can have ELS_REQUESTs, ELS_RSPs,
+ * or GEN_REQUESTs waiting for a CQE response.
*/
ulp_command = get_job_cmnd(phba, piocb);
- if (ulp_command == CMD_ELS_REQUEST64_CR) {
+ if (ulp_command == CMD_ELS_REQUEST64_WQE ||
+ ulp_command == CMD_XMIT_ELS_RSP64_WQE) {
list_add_tail(&piocb->dlist, &abort_list);
/* If the link is down when flushing ELS commands
@@ -11327,10 +11321,10 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
/* Check for retry */
if (lpfc_els_retry(phba, cmdiocb, rspiocb))
goto out;
- /* FDISC failed */
- lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
- "0126 FDISC failed. (x%x/x%x)\n",
- ulp_status, ulp_word4);
+ /* Warn FDISC status */
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
+ "0126 FDISC cmpl status: x%x/x%x)\n",
+ ulp_status, ulp_word4);
goto fdisc_failed;
}
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 35c9181c6608..9241075f72fa 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -527,6 +527,9 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
* the following lpfc_nlp_put is necessary after fabric node is
* recovered.
*/
+ spin_lock_irqsave(&ndlp->lock, iflags);
+ ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
+ spin_unlock_irqrestore(&ndlp->lock, iflags);
if (recovering) {
lpfc_printf_vlog(vport, KERN_INFO,
LOG_DISCOVERY | LOG_NODE,
@@ -539,6 +542,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
spin_lock_irqsave(&ndlp->lock, iflags);
ndlp->save_flags |= NLP_IN_RECOV_POST_DEV_LOSS;
spin_unlock_irqrestore(&ndlp->lock, iflags);
+ return fcf_inuse;
} else if (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
/* Fabric node fully recovered before this dev_loss_tmo
* queue work is processed. Thus, ignore the
@@ -552,15 +556,9 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
ndlp->nlp_DID, kref_read(&ndlp->kref),
ndlp, ndlp->nlp_flag,
vport->port_state);
- spin_lock_irqsave(&ndlp->lock, iflags);
- ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
- spin_unlock_irqrestore(&ndlp->lock, iflags);
return fcf_inuse;
}
- spin_lock_irqsave(&ndlp->lock, iflags);
- ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
- spin_unlock_irqrestore(&ndlp->lock, iflags);
lpfc_nlp_put(ndlp);
return fcf_inuse;
}
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 2108b4cb7815..d5c15742f7f2 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -562,6 +562,27 @@ struct fc_vft_header {
#include <uapi/scsi/fc/fc_els.h>
/*
+ * Application Header
+ */
+struct fc_app_header {
+ uint32_t dst_app_id;
+ uint32_t src_app_id;
+#define LOOPBACK_SRC_APPID 0x4321
+ uint32_t word2;
+ uint32_t word3;
+};
+
+/*
+ * dfctl optional header definition
+ */
+enum lpfc_fc_dfctl {
+ LPFC_FC_NO_DEVICE_HEADER,
+ LPFC_FC_16B_DEVICE_HEADER,
+ LPFC_FC_32B_DEVICE_HEADER,
+ LPFC_FC_64B_DEVICE_HEADER,
+};
+
+/*
* Extended Link Service LS_COMMAND codes (Payload Word 0)
*/
#ifdef __BIG_ENDIAN_BITFIELD
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 500253007b1d..26e1313ebb21 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -4847,6 +4847,7 @@ struct fcp_iwrite64_wqe {
#define cmd_buff_len_SHIFT 16
#define cmd_buff_len_MASK 0x00000ffff
#define cmd_buff_len_WORD word3
+/* Note: payload_offset_len field depends on ASIC support */
#define payload_offset_len_SHIFT 0
#define payload_offset_len_MASK 0x0000ffff
#define payload_offset_len_WORD word3
@@ -4863,6 +4864,7 @@ struct fcp_iread64_wqe {
#define cmd_buff_len_SHIFT 16
#define cmd_buff_len_MASK 0x00000ffff
#define cmd_buff_len_WORD word3
+/* Note: payload_offset_len field depends on ASIC support */
#define payload_offset_len_SHIFT 0
#define payload_offset_len_MASK 0x0000ffff
#define payload_offset_len_WORD word3
@@ -4879,6 +4881,7 @@ struct fcp_icmnd64_wqe {
#define cmd_buff_len_SHIFT 16
#define cmd_buff_len_MASK 0x00000ffff
#define cmd_buff_len_WORD word3
+/* Note: payload_offset_len field depends on ASIC support */
#define payload_offset_len_SHIFT 0
#define payload_offset_len_MASK 0x0000ffff
#define payload_offset_len_WORD word3
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 50620918becd..0dd451009b07 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -4699,6 +4699,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
uint64_t wwn;
bool use_no_reset_hba = false;
int rc;
+ u8 if_type;
if (lpfc_no_hba_reset_cnt) {
if (phba->sli_rev < LPFC_SLI_REV4 &&
@@ -4773,10 +4774,24 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
shost->max_id = LPFC_MAX_TARGET;
shost->max_lun = vport->cfg_max_luns;
shost->this_id = -1;
- if (phba->sli_rev == LPFC_SLI_REV4)
- shost->max_cmd_len = LPFC_FCP_CDB_LEN_32;
- else
+
+ /* Set max_cmd_len applicable to ASIC support */
+ if (phba->sli_rev == LPFC_SLI_REV4) {
+ if_type = bf_get(lpfc_sli_intf_if_type,
+ &phba->sli4_hba.sli_intf);
+ switch (if_type) {
+ case LPFC_SLI_INTF_IF_TYPE_2:
+ fallthrough;
+ case LPFC_SLI_INTF_IF_TYPE_6:
+ shost->max_cmd_len = LPFC_FCP_CDB_LEN_32;
+ break;
+ default:
+ shost->max_cmd_len = LPFC_FCP_CDB_LEN;
+ break;
+ }
+ } else {
shost->max_cmd_len = LPFC_FCP_CDB_LEN;
+ }
if (phba->sli_rev == LPFC_SLI_REV4) {
if (!phba->cfg_fcp_mq_threshold ||
@@ -10436,6 +10451,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
struct lpfc_vector_map_info *cpup;
struct lpfc_vector_map_info *eqcpup;
struct lpfc_eq_intr_info *eqi;
+ u32 wqesize;
/*
* Create HBA Record arrays.
@@ -10655,9 +10671,15 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
* Create ELS Work Queues
*/
- /* Create slow-path ELS Work Queue */
+ /*
+ * Create slow-path ELS Work Queue.
+ * Increase the ELS WQ size when WQEs contain an embedded cdb
+ */
+ wqesize = (phba->fcp_embed_io) ?
+ LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
+
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
- phba->sli4_hba.wq_esize,
+ wqesize,
phba->sli4_hba.wq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 60cd60ebff38..0eaede8275da 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -4760,7 +4760,7 @@ static int lpfc_scsi_prep_cmnd_buf_s4(struct lpfc_vport *vport,
/* Word 3 */
bf_set(payload_offset_len, &wqe->fcp_icmd,
- sizeof(struct fcp_cmnd32) + sizeof(struct fcp_rsp));
+ sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
/* Word 6 */
bf_set(wqe_ctxt_tag, &wqe->generic.wqe_com,
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 332b8d2348e9..2ec6e55771b4 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1940,12 +1940,15 @@ lpfc_issue_cmf_sync_wqe(struct lpfc_hba *phba, u32 ms, u64 total)
atot = atomic_xchg(&phba->cgn_sync_alarm_cnt, 0);
wtot = atomic_xchg(&phba->cgn_sync_warn_cnt, 0);
+ spin_lock_irqsave(&phba->hbalock, iflags);
+
/* ONLY Managed mode will send the CMF_SYNC_WQE to the HBA */
if (phba->cmf_active_mode != LPFC_CFG_MANAGED ||
- phba->link_state == LPFC_LINK_DOWN)
- return 0;
+ phba->link_state < LPFC_LINK_UP) {
+ ret_val = 0;
+ goto out_unlock;
+ }
- spin_lock_irqsave(&phba->hbalock, iflags);
sync_buf = __lpfc_sli_get_iocbq(phba);
if (!sync_buf) {
lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT,
@@ -8818,7 +8821,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
rc = lpfc_sli4_queue_setup(phba);
if (unlikely(rc)) {
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
- "0381 Error %d during queue setup.\n ", rc);
+ "0381 Error %d during queue setup.\n", rc);
goto out_stop_timers;
}
/* Initialize the driver internal SLI layer lists. */
@@ -11090,9 +11093,17 @@ __lpfc_sli_prep_xmit_seq64_s4(struct lpfc_iocbq *cmdiocbq,
/* Word 9 */
bf_set(wqe_rcvoxid, &wqe->xmit_sequence.wqe_com, ox_id);
- /* Word 12 */
- if (cmdiocbq->cmd_flag & (LPFC_IO_LIBDFC | LPFC_IO_LOOPBACK))
+ if (cmdiocbq->cmd_flag & (LPFC_IO_LIBDFC | LPFC_IO_LOOPBACK)) {
+ /* Word 10 */
+ if (cmdiocbq->cmd_flag & LPFC_IO_VMID) {
+ bf_set(wqe_appid, &wqe->xmit_sequence.wqe_com, 1);
+ bf_set(wqe_wqes, &wqe->xmit_sequence.wqe_com, 1);
+ wqe->words[31] = LOOPBACK_SRC_APPID;
+ }
+
+ /* Word 12 */
wqe->xmit_sequence.xmit_len = full_size;
+ }
else
wqe->xmit_sequence.xmit_len =
wqe->xmit_sequence.bde.tus.f.bdeSize;
@@ -18431,6 +18442,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
{
/* make rctl_names static to save stack space */
struct fc_vft_header *fc_vft_hdr;
+ struct fc_app_header *fc_app_hdr;
uint32_t *header = (uint32_t *) fc_hdr;
#define FC_RCTL_MDS_DIAGS 0xF4
@@ -18486,6 +18498,32 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
goto drop;
}
+ if (unlikely(phba->link_flag == LS_LOOPBACK_MODE &&
+ phba->cfg_vmid_app_header)) {
+ /* Application header is 16B device header */
+ if (fc_hdr->fh_df_ctl & LPFC_FC_16B_DEVICE_HEADER) {
+ fc_app_hdr = (struct fc_app_header *) (fc_hdr + 1);
+ if (be32_to_cpu(fc_app_hdr->src_app_id) !=
+ LOOPBACK_SRC_APPID) {
+ lpfc_printf_log(phba, KERN_WARNING,
+ LOG_ELS | LOG_LIBDFC,
+ "1932 Loopback src app id "
+ "not matched, app_id:x%x\n",
+ be32_to_cpu(fc_app_hdr->src_app_id));
+
+ goto drop;
+ }
+ } else {
+ lpfc_printf_log(phba, KERN_WARNING,
+ LOG_ELS | LOG_LIBDFC,
+ "1933 Loopback df_ctl bit not set, "
+ "df_ctl:x%x\n",
+ fc_hdr->fh_df_ctl);
+
+ goto drop;
+ }
+ }
+
lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
"2538 Received frame rctl:x%x, type:x%x, "
"frame Data:%08x %08x %08x %08x %08x %08x %08x\n",
@@ -21149,7 +21187,7 @@ lpfc_drain_txq(struct lpfc_hba *phba)
if (!piocbq) {
spin_unlock_irqrestore(&pring->ring_lock, iflags);
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
- "2823 txq empty and txq_cnt is %d\n ",
+ "2823 txq empty and txq_cnt is %d\n",
txq_cnt);
break;
}
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 2fe0386a1fee..e70f163fab90 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
* included with this package. *
*******************************************************************/
-#define LPFC_DRIVER_VERSION "14.4.0.4"
+#define LPFC_DRIVER_VERSION "14.4.0.5"
#define LPFC_DRIVER_NAME "lpfc"
/* Used for SLI 2/3 */
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 4439167a5188..7a4d4d8e2ad5 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -626,6 +626,7 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
struct lpfc_hba *phba = vport->phba;
int rc;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
if (vport->port_type == LPFC_PHYSICAL_PORT) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@@ -679,21 +680,49 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
if (!ndlp)
goto skip_logo;
+ /* Send the DA_ID and Fabric LOGO to cleanup the NPIV fabric entries. */
if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE &&
phba->link_state >= LPFC_LINK_UP &&
phba->fc_topology != LPFC_TOPOLOGY_LOOP) {
if (vport->cfg_enable_da_id) {
- /* Send DA_ID and wait for a completion. */
+ /* Send DA_ID and wait for a completion. This is best
+ * effort. If the DA_ID fails, likely the fabric will
+ * "leak" NportIDs but at least the driver issued the
+ * command.
+ */
+ ndlp = lpfc_findnode_did(vport, NameServer_DID);
+ if (!ndlp)
+ goto issue_logo;
+
+ spin_lock_irq(&ndlp->lock);
+ ndlp->da_id_waitq = &waitq;
+ ndlp->save_flags |= NLP_WAIT_FOR_DA_ID;
+ spin_unlock_irq(&ndlp->lock);
+
rc = lpfc_ns_cmd(vport, SLI_CTNS_DA_ID, 0, 0);
- if (rc) {
- lpfc_printf_log(vport->phba, KERN_WARNING,
- LOG_VPORT,
- "1829 CT command failed to "
- "delete objects on fabric, "
- "rc %d\n", rc);
+ if (!rc) {
+ wait_event_timeout(waitq,
+ !(ndlp->save_flags & NLP_WAIT_FOR_DA_ID),
+ msecs_to_jiffies(phba->fc_ratov * 2000));
}
+
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_VPORT | LOG_ELS,
+ "1829 DA_ID issue status %d. "
+ "SFlag x%x NState x%x, NFlag x%x "
+ "Rpi x%x\n",
+ rc, ndlp->save_flags, ndlp->nlp_state,
+ ndlp->nlp_flag, ndlp->nlp_rpi);
+
+ /* Remove the waitq and save_flags. It no
+ * longer matters if the wake happened.
+ */
+ spin_lock_irq(&ndlp->lock);
+ ndlp->da_id_waitq = NULL;
+ ndlp->save_flags &= ~NLP_WAIT_FOR_DA_ID;
+ spin_unlock_irq(&ndlp->lock);
}
+issue_logo:
/*
* If the vpi is not registered, then a valid FDISC doesn't
* exist and there is no need for a ELS LOGO. Just cleanup
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 6c79c350a4d5..4ecf5284c0fc 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -6380,7 +6380,7 @@ static int megasas_init_fw(struct megasas_instance *instance)
GFP_KERNEL);
if (!fusion->stream_detect_by_ld[i]) {
dev_err(&instance->pdev->dev,
- "unable to allocate stream detect by LD\n ");
+ "unable to allocate stream detect by LD\n");
for (j = 0; j < i; ++j)
kfree(fusion->stream_detect_by_ld[j]);
kfree(fusion->stream_detect_by_ld);
diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h b/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h
index 4b7a8f6314a3..00cd18edfad6 100644
--- a/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h
+++ b/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h
@@ -67,6 +67,7 @@
#define MPI3_SECURITY_PGAD_SLOT_GROUP_MASK (0x0000ff00)
#define MPI3_SECURITY_PGAD_SLOT_GROUP_SHIFT (8)
#define MPI3_SECURITY_PGAD_SLOT_MASK (0x000000ff)
+#define MPI3_INSTANCE_PGAD_INSTANCE_MASK (0x0000ffff)
struct mpi3_config_request {
__le16 host_tag;
u8 ioc_use_only02;
@@ -75,7 +76,8 @@ struct mpi3_config_request {
u8 ioc_use_only06;
u8 msg_flags;
__le16 change_count;
- __le16 reserved0a;
+ u8 proxy_ioc_number;
+ u8 reserved0b;
u8 page_version;
u8 page_number;
u8 page_type;
@@ -206,6 +208,9 @@ struct mpi3_config_page_header {
#define MPI3_MFGPAGE_DEVID_SAS5116_MPI_MGMT (0x00b5)
#define MPI3_MFGPAGE_DEVID_SAS5116_NVME_MGMT (0x00b6)
#define MPI3_MFGPAGE_DEVID_SAS5116_PCIE_SWITCH (0x00b8)
+#define MPI3_MFGPAGE_DEVID_SAS5248_MPI (0x00f0)
+#define MPI3_MFGPAGE_DEVID_SAS5248_MPI_NS (0x00f1)
+#define MPI3_MFGPAGE_DEVID_SAS5248_PCIE_SWITCH (0x00f2)
struct mpi3_man_page0 {
struct mpi3_config_page_header header;
u8 chip_revision[8];
@@ -1074,6 +1079,8 @@ struct mpi3_io_unit_page8 {
#define MPI3_IOUNIT8_SBSTATE_SVN_UPDATE_PENDING (0x04)
#define MPI3_IOUNIT8_SBSTATE_KEY_UPDATE_PENDING (0x02)
#define MPI3_IOUNIT8_SBSTATE_SECURE_BOOT_ENABLED (0x01)
+#define MPI3_IOUNIT8_SBMODE_CURRENT_KEY_IOUNIT17 (0x10)
+#define MPI3_IOUNIT8_SBMODE_HARD_SECURE_RECERTIFIED (0x08)
struct mpi3_io_unit_page9 {
struct mpi3_config_page_header header;
__le32 flags;
@@ -1089,6 +1096,8 @@ struct mpi3_io_unit_page9 {
#define MPI3_IOUNIT9_FLAGS_UBM_ENCLOSURE_ORDER_BACKPLANE_TYPE (0x00000004)
#define MPI3_IOUNIT9_FLAGS_VDFIRST_ENABLED (0x00000001)
#define MPI3_IOUNIT9_FIRSTDEVICE_UNKNOWN (0xffff)
+#define MPI3_IOUNIT9_FIRSTDEVICE_IN_DRIVER_PAGE_0 (0xfffe)
+
struct mpi3_io_unit_page10 {
struct mpi3_config_page_header header;
u8 flags;
@@ -1224,6 +1233,19 @@ struct mpi3_io_unit_page15 {
#define MPI3_IOUNIT15_FLAGS_EPRSUPPORT_WITHOUT_POWER_BRAKE_GPIO (0x01)
#define MPI3_IOUNIT15_FLAGS_EPRSUPPORT_WITH_POWER_BRAKE_GPIO (0x02)
#define MPI3_IOUNIT15_NUMPOWERBUDGETDATA_POWER_BUDGETING_DISABLED (0x00)
+
+struct mpi3_io_unit_page17 {
+ struct mpi3_config_page_header header;
+ u8 num_instances;
+ u8 instance;
+ __le16 reserved0a;
+ __le32 reserved0c[4];
+ __le16 key_length;
+ u8 encryption_algorithm;
+ u8 reserved1f;
+ __le32 current_key[];
+};
+#define MPI3_IOUNIT17_PAGEVERSION (0x00)
struct mpi3_ioc_page0 {
struct mpi3_config_page_header header;
__le32 reserved08;
@@ -1311,7 +1333,7 @@ struct mpi3_driver_page0 {
u8 tur_interval;
u8 reserved10;
u8 security_key_timeout;
- __le16 reserved12;
+ __le16 first_device;
__le32 reserved14;
__le32 reserved18;
};
@@ -1324,10 +1346,13 @@ struct mpi3_driver_page0 {
#define MPI3_DRIVER0_BSDOPTS_REGISTRATION_IOC_AND_DEVS (0x00000000)
#define MPI3_DRIVER0_BSDOPTS_REGISTRATION_IOC_ONLY (0x00000001)
#define MPI3_DRIVER0_BSDOPTS_REGISTRATION_IOC_AND_INTERNAL_DEVS (0x00000002)
+#define MPI3_DRIVER0_FIRSTDEVICE_IGNORE1 (0x0000)
+#define MPI3_DRIVER0_FIRSTDEVICE_IGNORE2 (0xffff)
struct mpi3_driver_page1 {
struct mpi3_config_page_header header;
__le32 flags;
- __le32 reserved0c;
+ u8 time_stamp_update;
+ u8 reserved0d[3];
__le16 host_diag_trace_max_size;
__le16 host_diag_trace_min_size;
__le16 host_diag_trace_decrement_size;
@@ -2347,6 +2372,10 @@ struct mpi3_device0_vd_format {
#define MPI3_DEVICE0_VD_DEVICE_INFO_SAS (0x0001)
#define MPI3_DEVICE0_VD_FLAGS_IO_THROTTLE_GROUP_QD_MASK (0xf000)
#define MPI3_DEVICE0_VD_FLAGS_IO_THROTTLE_GROUP_QD_SHIFT (12)
+#define MPI3_DEVICE0_VD_FLAGS_OSEXPOSURE_MASK (0x0003)
+#define MPI3_DEVICE0_VD_FLAGS_OSEXPOSURE_HDD (0x0000)
+#define MPI3_DEVICE0_VD_FLAGS_OSEXPOSURE_SSD (0x0001)
+#define MPI3_DEVICE0_VD_FLAGS_OSEXPOSURE_NO_GUIDANCE (0x0002)
union mpi3_device0_dev_spec_format {
struct mpi3_device0_sas_sata_format sas_sata_format;
struct mpi3_device0_pcie_format pcie_format;
diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_image.h b/drivers/scsi/mpi3mr/mpi/mpi30_image.h
index 7df242190135..2c6e548cbd0f 100644
--- a/drivers/scsi/mpi3mr/mpi/mpi30_image.h
+++ b/drivers/scsi/mpi3mr/mpi/mpi30_image.h
@@ -205,13 +205,14 @@ struct mpi3_encrypted_hash_entry {
u8 hash_image_type;
u8 hash_algorithm;
u8 encryption_algorithm;
- u8 reserved03;
+ u8 flags;
__le16 public_key_size;
__le16 signature_size;
__le32 public_key[MPI3_PUBLIC_KEY_MAX];
};
-
-#define MPI3_HASH_IMAGE_TYPE_KEY_WITH_SIGNATURE (0x03)
+#define MPI3_HASH_IMAGE_TYPE_KEY_WITH_HASH (0x03)
+#define MPI3_HASH_IMAGE_TYPE_KEY_WITH_HASH_1_OF_2 (0x04)
+#define MPI3_HASH_IMAGE_TYPE_KEY_WITH_HASH_2_OF_2 (0x05)
#define MPI3_HASH_ALGORITHM_VERSION_MASK (0xe0)
#define MPI3_HASH_ALGORITHM_VERSION_NONE (0x00)
#define MPI3_HASH_ALGORITHM_VERSION_SHA1 (0x20)
@@ -230,6 +231,12 @@ struct mpi3_encrypted_hash_entry {
#define MPI3_ENCRYPTION_ALGORITHM_RSA4096 (0x05)
#define MPI3_ENCRYPTION_ALGORITHM_RSA3072 (0x06)
+/* hierarchical signature system (hss) */
+#define MPI3_ENCRYPTION_ALGORITHM_ML_DSA_87 (0x0b)
+#define MPI3_ENCRYPTION_ALGORITHM_ML_DSA_65 (0x0c)
+#define MPI3_ENCRYPTION_ALGORITHM_ML_DSA_44 (0x0d)
+#define MPI3_ENCRYPTED_HASH_ENTRY_FLAGS_PAIRED_KEY_MASK (0x0f)
+
#ifndef MPI3_ENCRYPTED_HASH_ENTRY_MAX
#define MPI3_ENCRYPTED_HASH_ENTRY_MAX (1)
#endif
diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h b/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h
index c9fa0d69b75f..c374867f9ba0 100644
--- a/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h
+++ b/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h
@@ -39,6 +39,12 @@ struct mpi3_ioc_init_request {
#define MPI3_WHOINIT_HOST_DRIVER (0x03)
#define MPI3_WHOINIT_MANUFACTURER (0x04)
+#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_MASK (0x00000003)
+#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_NO_GUIDANCE (0x00000000)
+#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_NO_SPECIAL (0x00000001)
+#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_REPORT_AS_HDD (0x00000002)
+#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_REPORT_AS_SSD (0x00000003)
+
struct mpi3_ioc_facts_request {
__le16 host_tag;
u8 ioc_use_only02;
@@ -140,6 +146,8 @@ struct mpi3_ioc_facts_data {
#define MPI3_IOCFACTS_EXCEPT_MANUFACT_CHECKSUM_FAIL (0x0020)
#define MPI3_IOCFACTS_EXCEPT_FW_CHECKSUM_FAIL (0x0010)
#define MPI3_IOCFACTS_EXCEPT_CONFIG_CHECKSUM_FAIL (0x0008)
+#define MPI3_IOCFACTS_EXCEPT_BLOCKING_BOOT_EVENT (0x0004)
+#define MPI3_IOCFACTS_EXCEPT_SECURITY_SELFTEST_FAILURE (0x0002)
#define MPI3_IOCFACTS_EXCEPT_BOOTSTAT_MASK (0x0001)
#define MPI3_IOCFACTS_EXCEPT_BOOTSTAT_PRIMARY (0x0000)
#define MPI3_IOCFACTS_EXCEPT_BOOTSTAT_SECONDARY (0x0001)
diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_transport.h b/drivers/scsi/mpi3mr/mpi/mpi30_transport.h
index fdc3d1968e43..b2ab25a1cfeb 100644
--- a/drivers/scsi/mpi3mr/mpi/mpi30_transport.h
+++ b/drivers/scsi/mpi3mr/mpi/mpi30_transport.h
@@ -18,7 +18,7 @@ union mpi3_version_union {
#define MPI3_VERSION_MAJOR (3)
#define MPI3_VERSION_MINOR (0)
-#define MPI3_VERSION_UNIT (31)
+#define MPI3_VERSION_UNIT (34)
#define MPI3_VERSION_DEV (0)
#define MPI3_DEVHANDLE_INVALID (0xffff)
struct mpi3_sysif_oper_queue_indexes {
@@ -158,6 +158,7 @@ struct mpi3_sysif_registers {
#define MPI3_SYSIF_FAULT_CODE_SOFT_RESET_NEEDED (0x0000f004)
#define MPI3_SYSIF_FAULT_CODE_POWER_CYCLE_REQUIRED (0x0000f005)
#define MPI3_SYSIF_FAULT_CODE_TEMP_THRESHOLD_EXCEEDED (0x0000f006)
+#define MPI3_SYSIF_FAULT_CODE_INSUFFICIENT_PCI_SLOT_POWER (0x0000f007)
#define MPI3_SYSIF_FAULT_INFO0_OFFSET (0x00001c14)
#define MPI3_SYSIF_FAULT_INFO1_OFFSET (0x00001c18)
#define MPI3_SYSIF_FAULT_INFO2_OFFSET (0x00001c1c)
@@ -410,6 +411,7 @@ struct mpi3_default_reply {
#define MPI3_IOCSTATUS_INSUFFICIENT_RESOURCES (0x0006)
#define MPI3_IOCSTATUS_INVALID_FIELD (0x0007)
#define MPI3_IOCSTATUS_INVALID_STATE (0x0008)
+#define MPI3_IOCSTATUS_SHUTDOWN_ACTIVE (0x0009)
#define MPI3_IOCSTATUS_INSUFFICIENT_POWER (0x000a)
#define MPI3_IOCSTATUS_INVALID_CHANGE_COUNT (0x000b)
#define MPI3_IOCSTATUS_ALLOWED_CMD_BLOCK (0x000c)
diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h
index 1dc640de3efc..fcb0fa31536b 100644
--- a/drivers/scsi/mpi3mr/mpi3mr.h
+++ b/drivers/scsi/mpi3mr/mpi3mr.h
@@ -57,8 +57,8 @@ extern struct list_head mrioc_list;
extern int prot_mask;
extern atomic64_t event_counter;
-#define MPI3MR_DRIVER_VERSION "8.10.0.5.50"
-#define MPI3MR_DRIVER_RELDATE "08-Aug-2024"
+#define MPI3MR_DRIVER_VERSION "8.12.0.0.50"
+#define MPI3MR_DRIVER_RELDATE "05-Sept-2024"
#define MPI3MR_DRIVER_NAME "mpi3mr"
#define MPI3MR_DRIVER_LICENSE "GPL"
@@ -178,7 +178,7 @@ extern atomic64_t event_counter;
#define MPI3MR_DEFAULT_SDEV_QD 32
/* Definitions for Threaded IRQ poll*/
-#define MPI3MR_IRQ_POLL_SLEEP 2
+#define MPI3MR_IRQ_POLL_SLEEP 20
#define MPI3MR_IRQ_POLL_TRIGGER_IOCOUNT 8
/* Definitions for the controller security status*/
@@ -1090,6 +1090,7 @@ struct scmd_priv {
* @evtack_cmds_bitmap: Event Ack bitmap
* @delayed_evtack_cmds_list: Delayed event acknowledgment list
* @ts_update_counter: Timestamp update counter
+ * @ts_update_interval: Timestamp update interval
* @reset_in_progress: Reset in progress flag
* @unrecoverable: Controller unrecoverable flag
* @prev_reset_result: Result of previous reset
@@ -1277,7 +1278,8 @@ struct mpi3mr_ioc {
unsigned long *evtack_cmds_bitmap;
struct list_head delayed_evtack_cmds_list;
- u32 ts_update_counter;
+ u16 ts_update_counter;
+ u16 ts_update_interval;
u8 reset_in_progress;
u8 unrecoverable;
int prev_reset_result;
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 2e1a92d306b2..f1ab76351bd8 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -728,7 +728,7 @@ static irqreturn_t mpi3mr_isr_poll(int irq, void *privdata)
mpi3mr_process_op_reply_q(mrioc,
intr_info->op_reply_q);
- usleep_range(MPI3MR_IRQ_POLL_SLEEP, 10 * MPI3MR_IRQ_POLL_SLEEP);
+ usleep_range(MPI3MR_IRQ_POLL_SLEEP, MPI3MR_IRQ_POLL_SLEEP + 1);
} while (atomic_read(&intr_info->op_reply_q->pend_ios) &&
(num_op_reply < mrioc->max_host_ios));
@@ -1362,6 +1362,10 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc)
int retval = 0;
enum mpi3mr_iocstate ioc_state;
u64 base_info;
+ u8 retry = 0;
+ u64 start_time, elapsed_time_sec;
+
+retry_bring_ioc_ready:
ioc_status = readl(&mrioc->sysif_regs->ioc_status);
ioc_config = readl(&mrioc->sysif_regs->ioc_configuration);
@@ -1380,26 +1384,23 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc)
ioc_info(mrioc, "controller is in %s state during detection\n",
mpi3mr_iocstate_name(ioc_state));
- if (ioc_state == MRIOC_STATE_BECOMING_READY ||
- ioc_state == MRIOC_STATE_RESET_REQUESTED) {
- timeout = mrioc->ready_timeout * 10;
- do {
- msleep(100);
- } while (--timeout);
+ timeout = mrioc->ready_timeout * 10;
+
+ do {
+ ioc_state = mpi3mr_get_iocstate(mrioc);
+
+ if (ioc_state != MRIOC_STATE_BECOMING_READY &&
+ ioc_state != MRIOC_STATE_RESET_REQUESTED)
+ break;
if (!pci_device_is_present(mrioc->pdev)) {
mrioc->unrecoverable = 1;
- ioc_err(mrioc,
- "controller is not present while waiting to reset\n");
- retval = -1;
+ ioc_err(mrioc, "controller is not present while waiting to reset\n");
goto out_device_not_present;
}
- ioc_state = mpi3mr_get_iocstate(mrioc);
- ioc_info(mrioc,
- "controller is in %s state after waiting to reset\n",
- mpi3mr_iocstate_name(ioc_state));
- }
+ msleep(100);
+ } while (--timeout);
if (ioc_state == MRIOC_STATE_READY) {
ioc_info(mrioc, "issuing message unit reset (MUR) to bring to reset state\n");
@@ -1460,6 +1461,9 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc)
ioc_config |= MPI3_SYSIF_IOC_CONFIG_ENABLE_IOC;
writel(ioc_config, &mrioc->sysif_regs->ioc_configuration);
+ if (retry == 0)
+ start_time = jiffies;
+
timeout = mrioc->ready_timeout * 10;
do {
ioc_state = mpi3mr_get_iocstate(mrioc);
@@ -1469,6 +1473,12 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc)
mpi3mr_iocstate_name(ioc_state));
return 0;
}
+ ioc_status = readl(&mrioc->sysif_regs->ioc_status);
+ if ((ioc_status & MPI3_SYSIF_IOC_STATUS_RESET_HISTORY) ||
+ (ioc_status & MPI3_SYSIF_IOC_STATUS_FAULT)) {
+ mpi3mr_print_fault_info(mrioc);
+ goto out_failed;
+ }
if (!pci_device_is_present(mrioc->pdev)) {
mrioc->unrecoverable = 1;
ioc_err(mrioc,
@@ -1477,9 +1487,19 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc)
goto out_device_not_present;
}
msleep(100);
- } while (--timeout);
+ elapsed_time_sec = jiffies_to_msecs(jiffies - start_time)/1000;
+ } while (elapsed_time_sec < mrioc->ready_timeout);
out_failed:
+ elapsed_time_sec = jiffies_to_msecs(jiffies - start_time)/1000;
+ if ((retry < 2) && (elapsed_time_sec < (mrioc->ready_timeout - 60))) {
+ retry++;
+
+ ioc_warn(mrioc, "retrying to bring IOC ready, retry_count:%d\n"
+ " elapsed time =%llu\n", retry, elapsed_time_sec);
+
+ goto retry_bring_ioc_ready;
+ }
ioc_state = mpi3mr_get_iocstate(mrioc);
ioc_err(mrioc,
"failed to bring to ready state, current state: %s\n",
@@ -2671,7 +2691,7 @@ static void mpi3mr_watchdog_work(struct work_struct *work)
return;
}
- if (mrioc->ts_update_counter++ >= MPI3MR_TSUPDATE_INTERVAL) {
+ if (mrioc->ts_update_counter++ >= mrioc->ts_update_interval) {
mrioc->ts_update_counter = 0;
mpi3mr_sync_timestamp(mrioc);
}
@@ -3845,6 +3865,29 @@ static int mpi3mr_repost_diag_bufs(struct mpi3mr_ioc *mrioc)
}
/**
+ * mpi3mr_read_tsu_interval - Update time stamp interval
+ * @mrioc: Adapter instance reference
+ *
+ * Update time stamp interval if its defined in driver page 1,
+ * otherwise use default value.
+ *
+ * Return: Nothing
+ */
+static void
+mpi3mr_read_tsu_interval(struct mpi3mr_ioc *mrioc)
+{
+ struct mpi3_driver_page1 driver_pg1;
+ u16 pg_sz = sizeof(driver_pg1);
+ int retval = 0;
+
+ mrioc->ts_update_interval = MPI3MR_TSUPDATE_INTERVAL;
+
+ retval = mpi3mr_cfg_get_driver_pg1(mrioc, &driver_pg1, pg_sz);
+ if (!retval && driver_pg1.time_stamp_update)
+ mrioc->ts_update_interval = (driver_pg1.time_stamp_update * 60);
+}
+
+/**
* mpi3mr_print_ioc_info - Display controller information
* @mrioc: Adapter instance reference
*
@@ -4140,6 +4183,7 @@ retry_init:
goto out_failed_noretry;
}
+ mpi3mr_read_tsu_interval(mrioc);
mpi3mr_print_ioc_info(mrioc);
if (!mrioc->cfg_page) {
@@ -4321,6 +4365,7 @@ retry_init:
goto out_failed_noretry;
}
+ mpi3mr_read_tsu_interval(mrioc);
mpi3mr_print_ioc_info(mrioc);
if (is_resume) {
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 9a24f7776d64..ed5046593fda 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -8898,9 +8898,8 @@ _base_check_ioc_facts_changes(struct MPT3SAS_ADAPTER *ioc)
ioc->device_remove_in_progress, pd_handles_sz, GFP_KERNEL);
if (!device_remove_in_progress) {
ioc_info(ioc,
- "Unable to allocate the memory for "
- "device_remove_in_progress of sz: %d\n "
- , pd_handles_sz);
+ "Unable to allocate the memory for device_remove_in_progress of sz: %d\n",
+ pd_handles_sz);
return -ENOMEM;
}
memset(device_remove_in_progress +
diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 1e63cb6cd8e3..33e1eba62ca1 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -100,10 +100,12 @@ static void pm8001_map_queues(struct Scsi_Host *shost)
struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
- if (pm8001_ha->number_of_intr > 1)
+ if (pm8001_ha->number_of_intr > 1) {
blk_mq_pci_map_queues(qmap, pm8001_ha->pdev, 1);
+ return;
+ }
- return blk_mq_map_queues(qmap);
+ blk_mq_map_queues(qmap);
}
/*
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 8fe886dc5e47..a9869cd8c4c0 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -2037,7 +2037,7 @@ mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
atomic_dec(&pm8001_dev->running_req);
break;
}
- pm8001_dbg(pm8001_ha, IO, "scsi_status = 0x%x\n ",
+ pm8001_dbg(pm8001_ha, IO, "scsi_status = 0x%x\n",
psspPayload->ssp_resp_iu.status);
spin_lock_irqsave(&t->task_state_lock, flags);
t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index 72a4c6e3d0c8..4c5881917d76 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -1946,7 +1946,7 @@ static void pmcraid_soft_reset(struct pmcraid_cmd *cmd)
}
iowrite32(doorbell, pinstance->int_regs.host_ioa_interrupt_reg);
- ioread32(pinstance->int_regs.host_ioa_interrupt_reg),
+ ioread32(pinstance->int_regs.host_ioa_interrupt_reg);
int_reg = ioread32(pinstance->int_regs.ioa_host_interrupt_reg);
pmcraid_info("Waiting for IOA to become operational %x:%x\n",
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index 054a51713d55..fcfc3bed02c6 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -310,7 +310,7 @@ struct qedf_ioreq *qedf_alloc_cmd(struct qedf_rport *fcport, u8 cmd_type)
if (!free_sqes) {
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
- "Returning NULL, free_sqes=%d.\n ",
+ "Returning NULL, free_sqes=%d.\n",
free_sqes);
goto out_failed;
}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index a9d8a9c62663..d95f417e24c0 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -2760,7 +2760,6 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
else
bd_len = 0;
alloc_len = msense_6 ? cmd[4] : get_unaligned_be16(cmd + 7);
- memset(arr, 0, SDEBUG_MAX_MSENSE_SZ);
if (0x3 == pcontrol) { /* Saving values not supported */
mk_sense_buffer(scp, ILLEGAL_REQUEST, SAVING_PARAMS_UNSUP, 0);
return check_condition_result;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 76f488ef6a7e..41e2dfa2d67d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -38,7 +38,6 @@
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/bio-integrity.h>
#include <linux/hdreg.h>
#include <linux/errno.h>
#include <linux/idr.h>
@@ -3404,7 +3403,7 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp,
rcu_read_lock();
vpd = rcu_dereference(sdkp->device->vpd_pgb1);
- if (!vpd || vpd->len < 8) {
+ if (!vpd || vpd->len <= 8) {
rcu_read_unlock();
return;
}
@@ -4093,9 +4092,38 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
{
unsigned char cmd[6] = { START_STOP }; /* START_VALID */
struct scsi_sense_hdr sshdr;
+ struct scsi_failure failure_defs[] = {
+ {
+ /* Power on, reset, or bus device reset occurred */
+ .sense = UNIT_ATTENTION,
+ .asc = 0x29,
+ .ascq = 0,
+ .result = SAM_STAT_CHECK_CONDITION,
+ },
+ {
+ /* Power on occurred */
+ .sense = UNIT_ATTENTION,
+ .asc = 0x29,
+ .ascq = 1,
+ .result = SAM_STAT_CHECK_CONDITION,
+ },
+ {
+ /* SCSI bus reset */
+ .sense = UNIT_ATTENTION,
+ .asc = 0x29,
+ .ascq = 2,
+ .result = SAM_STAT_CHECK_CONDITION,
+ },
+ {}
+ };
+ struct scsi_failures failures = {
+ .total_allowed = 3,
+ .failure_definitions = failure_defs,
+ };
const struct scsi_exec_args exec_args = {
.sshdr = &sshdr,
.req_flags = BLK_MQ_REQ_PM,
+ .failures = &failures,
};
struct scsi_device *sdp = sdkp->device;
int res;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index baf870a03ecf..f86be197fedd 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1424,7 +1424,6 @@ static const struct file_operations sg_fops = {
.mmap = sg_mmap,
.release = sg_release,
.fasync = sg_fasync,
- .llseek = no_llseek,
};
static const struct class sg_sysfs_class = {
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 0d8ce1a92168..d50bad3a2ce9 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -834,6 +834,9 @@ static int flush_buffer(struct scsi_tape *STp, int seek_next)
int backspace, result;
struct st_partstat *STps;
+ if (STp->ready != ST_READY)
+ return 0;
+
/*
* If there was a bus reset, block further access
* to this device.
@@ -841,8 +844,6 @@ static int flush_buffer(struct scsi_tape *STp, int seek_next)
if (STp->pos_unknown)
return (-EIO);
- if (STp->ready != ST_READY)
- return 0;
STps = &(STp->ps[STp->partition]);
if (STps->rw == ST_WRITING) /* Writing */
return st_flush_write_buffer(STp);
diff --git a/drivers/scsi/zalon.c b/drivers/scsi/zalon.c
index 22d412cab91d..15602ec862e3 100644
--- a/drivers/scsi/zalon.c
+++ b/drivers/scsi/zalon.c
@@ -139,7 +139,7 @@ zalon_probe(struct parisc_device *dev)
return -ENODEV;
if (request_irq(dev->irq, ncr53c8xx_intr, IRQF_SHARED, "zalon", host)) {
- dev_printk(KERN_ERR, &dev->dev, "irq problem with %d, detaching\n ",
+ dev_printk(KERN_ERR, &dev->dev, "irq problem with %d, detaching\n",
dev->irq);
goto fail;
}
diff --git a/drivers/sh/intc/userimask.c b/drivers/sh/intc/userimask.c
index abe9091827cd..a363f77881d1 100644
--- a/drivers/sh/intc/userimask.c
+++ b/drivers/sh/intc/userimask.c
@@ -32,8 +32,11 @@ store_intc_userimask(struct device *dev,
const char *buf, size_t count)
{
unsigned long level;
+ int ret;
- level = simple_strtoul(buf, NULL, 10);
+ ret = kstrtoul(buf, 10, &level);
+ if (ret != 0)
+ return ret;
/*
* Minimal acceptable IRQ levels are in the 2 - 16 range, but
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 5539c5d139d4..653f82984216 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -685,7 +685,6 @@ static const struct file_operations spidev_fops = {
.compat_ioctl = spidev_compat_ioctl,
.open = spidev_open,
.release = spidev_release,
- .llseek = no_llseek,
};
/*-------------------------------------------------------------------------*/
diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
index 4b4a4d63e61f..cb149bcdd7d5 100644
--- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
@@ -564,7 +564,6 @@ static const struct file_operations acpi_thermal_rel_fops = {
.open = acpi_thermal_rel_open,
.release = acpi_thermal_rel_release,
.unlocked_ioctl = acpi_thermal_rel_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice acpi_thermal_rel_misc_device = {
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 6bd28a042dff..9771072da177 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -459,7 +459,6 @@ static void tty_show_fdinfo(struct seq_file *m, struct file *file)
}
static const struct file_operations tty_fops = {
- .llseek = no_llseek,
.read_iter = tty_read,
.write_iter = tty_write,
.splice_read = copy_splice_read,
@@ -474,7 +473,6 @@ static const struct file_operations tty_fops = {
};
static const struct file_operations console_fops = {
- .llseek = no_llseek,
.read_iter = tty_read,
.write_iter = redirected_tty_write,
.splice_read = copy_splice_read,
@@ -488,7 +486,6 @@ static const struct file_operations console_fops = {
};
static const struct file_operations hung_up_tty_fops = {
- .llseek = no_llseek,
.read_iter = hung_up_tty_read,
.write_iter = hung_up_tty_write,
.poll = hung_up_tty_poll,
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index c87fdc849c62..ecdfff2456e3 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -93,7 +93,7 @@ static const struct __ufs_qcom_bw_table {
[MODE_HS_RB][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 },
[MODE_HS_RB][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 },
[MODE_HS_RB][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 },
- [MODE_MAX][0][0] = { 7643136, 307200 },
+ [MODE_MAX][0][0] = { 7643136, 819200 },
};
static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host);
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 05b52e61a66f..c626bb73ea59 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -722,7 +722,6 @@ static __poll_t ffs_ep0_poll(struct file *file, poll_table *wait)
}
static const struct file_operations ffs_ep0_operations = {
- .llseek = no_llseek,
.open = ffs_ep0_open,
.write = ffs_ep0_write,
@@ -1830,7 +1829,6 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code,
}
static const struct file_operations ffs_epfile_operations = {
- .llseek = no_llseek,
.open = ffs_epfile_open,
.write_iter = ffs_epfile_write_iter,
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 03179b1880fd..9c7381661016 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -705,7 +705,6 @@ static const struct file_operations ep_io_operations = {
.open = ep_open,
.release = ep_release,
- .llseek = no_llseek,
.unlocked_ioctl = ep_ioctl,
.read_iter = ep_read_iter,
.write_iter = ep_write_iter,
@@ -1939,7 +1938,6 @@ gadget_dev_open (struct inode *inode, struct file *fd)
}
static const struct file_operations ep0_operations = {
- .llseek = no_llseek,
.open = gadget_dev_open,
.read = ep0_read,
diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c
index 399fca32a8ac..112fd18d8c99 100644
--- a/drivers/usb/gadget/legacy/raw_gadget.c
+++ b/drivers/usb/gadget/legacy/raw_gadget.c
@@ -1364,7 +1364,6 @@ static const struct file_operations raw_fops = {
.unlocked_ioctl = raw_ioctl,
.compat_ioctl = raw_ioctl,
.release = raw_release,
- .llseek = no_llseek,
};
static struct miscdevice raw_misc_device = {
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index b76885d78e8a..4928eba19327 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -187,7 +187,6 @@ static int regs_dbg_release(struct inode *inode, struct file *file)
static const struct file_operations queue_dbg_fops = {
.owner = THIS_MODULE,
.open = queue_dbg_open,
- .llseek = no_llseek,
.read = queue_dbg_read,
.release = queue_dbg_release,
};
diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c
index 7cbef74dfc9a..f392d6f84df9 100644
--- a/drivers/usb/misc/ldusb.c
+++ b/drivers/usb/misc/ldusb.c
@@ -627,7 +627,6 @@ static const struct file_operations ld_usb_fops = {
.open = ld_usb_open,
.release = ld_usb_release,
.poll = ld_usb_poll,
- .llseek = no_llseek,
};
/*
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 4e30de4db1c0..afb71c18415d 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1289,7 +1289,6 @@ static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma)
static const struct file_operations mon_fops_binary = {
.owner = THIS_MODULE,
.open = mon_bin_open,
- .llseek = no_llseek,
.read = mon_bin_read,
/* .write = mon_text_write, */
.poll = mon_bin_poll,
diff --git a/drivers/usb/mon/mon_stat.c b/drivers/usb/mon/mon_stat.c
index 3c23805ab1a4..398e02af6a2b 100644
--- a/drivers/usb/mon/mon_stat.c
+++ b/drivers/usb/mon/mon_stat.c
@@ -62,7 +62,6 @@ static int mon_stat_release(struct inode *inode, struct file *file)
const struct file_operations mon_fops_stat = {
.owner = THIS_MODULE,
.open = mon_stat_open,
- .llseek = no_llseek,
.read = mon_stat_read,
/* .write = mon_stat_write, */
/* .poll = mon_stat_poll, */
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index 2fe9b95bac1d..68b9b2b41189 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -685,7 +685,6 @@ static int mon_text_release(struct inode *inode, struct file *file)
static const struct file_operations mon_fops_text_t = {
.owner = THIS_MODULE,
.open = mon_text_open,
- .llseek = no_llseek,
.read = mon_text_read_t,
.release = mon_text_release,
};
@@ -693,7 +692,6 @@ static const struct file_operations mon_fops_text_t = {
static const struct file_operations mon_fops_text_u = {
.owner = THIS_MODULE,
.open = mon_text_open,
- .llseek = no_llseek,
.read = mon_text_read_u,
.release = mon_text_release,
};
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 9a3e97108ace..0d632ba5d2a3 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -723,7 +723,6 @@ static const struct file_operations hisi_acc_vf_resume_fops = {
.owner = THIS_MODULE,
.write = hisi_acc_vf_resume_write,
.release = hisi_acc_vf_release_file,
- .llseek = no_llseek,
};
static struct hisi_acc_vf_migration_file *
@@ -845,7 +844,6 @@ static const struct file_operations hisi_acc_vf_save_fops = {
.unlocked_ioctl = hisi_acc_vf_precopy_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.release = hisi_acc_vf_release_file,
- .llseek = no_llseek,
};
static struct hisi_acc_vf_migration_file *
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 61d9b0f9146d..242c23eef452 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -587,7 +587,6 @@ static const struct file_operations mlx5vf_save_fops = {
.unlocked_ioctl = mlx5vf_precopy_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.release = mlx5vf_release_file,
- .llseek = no_llseek,
};
static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
@@ -1000,7 +999,6 @@ static const struct file_operations mlx5vf_resume_fops = {
.owner = THIS_MODULE,
.write = mlx5vf_resume_write,
.release = mlx5vf_release_file,
- .llseek = no_llseek,
};
static struct mlx5_vf_migration_file *
diff --git a/drivers/vfio/pci/pds/lm.c b/drivers/vfio/pci/pds/lm.c
index 6b94cc0bf45b..f2673d395236 100644
--- a/drivers/vfio/pci/pds/lm.c
+++ b/drivers/vfio/pci/pds/lm.c
@@ -235,7 +235,6 @@ static const struct file_operations pds_vfio_save_fops = {
.owner = THIS_MODULE,
.read = pds_vfio_save_read,
.release = pds_vfio_release_file,
- .llseek = no_llseek,
};
static int pds_vfio_get_save_file(struct pds_vfio_pci_device *pds_vfio)
@@ -334,7 +333,6 @@ static const struct file_operations pds_vfio_restore_fops = {
.owner = THIS_MODULE,
.write = pds_vfio_restore_write,
.release = pds_vfio_release_file,
- .llseek = no_llseek,
};
static int pds_vfio_get_restore_file(struct pds_vfio_pci_device *pds_vfio)
diff --git a/drivers/vfio/pci/qat/main.c b/drivers/vfio/pci/qat/main.c
index e36740a282e7..be3644ced17b 100644
--- a/drivers/vfio/pci/qat/main.c
+++ b/drivers/vfio/pci/qat/main.c
@@ -220,7 +220,6 @@ static const struct file_operations qat_vf_save_fops = {
.unlocked_ioctl = qat_vf_precopy_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.release = qat_vf_release_file,
- .llseek = no_llseek,
};
static int qat_vf_save_state(struct qat_vf_core_device *qat_vdev,
@@ -345,7 +344,6 @@ static const struct file_operations qat_vf_resume_fops = {
.owner = THIS_MODULE,
.write = qat_vf_resume_write,
.release = qat_vf_release_file,
- .llseek = no_llseek,
};
static struct qat_vf_migration_file *
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 2e093535884b..e8b4e8c119b5 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -512,8 +512,10 @@ static int search_fb_in_map(int idx)
int i, retval = 0;
for (i = first_fb_vc; i <= last_fb_vc; i++) {
- if (con2fb_map[i] == idx)
+ if (con2fb_map[i] == idx) {
retval = 1;
+ break;
+ }
}
return retval;
}
@@ -523,8 +525,10 @@ static int search_for_mapped_con(void)
int i, retval = 0;
for (i = first_fb_vc; i <= last_fb_vc; i++) {
- if (con2fb_map[i] != -1)
+ if (con2fb_map[i] != -1) {
retval = 1;
+ break;
+ }
}
return retval;
}
@@ -861,6 +865,8 @@ static int set_con2fb_map(int unit, int newidx, int user)
return err;
fbcon_add_cursor_work(info);
+ } else if (vc) {
+ set_blitting_type(vc, info);
}
con2fb_map[unit] = newidx;
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c b/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c
index 4040e247e026..d5a43b3bf45e 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c
@@ -129,12 +129,9 @@ omapdss_of_find_source_for_first_ep(struct device_node *node)
return ERR_PTR(-EINVAL);
src_port = of_graph_get_remote_port(ep);
- if (!src_port) {
- of_node_put(ep);
- return ERR_PTR(-EINVAL);
- }
-
of_node_put(ep);
+ if (!src_port)
+ return ERR_PTR(-EINVAL);
src = omap_dss_find_output_by_port_node(src_port);
diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c
index 009bf1d92644..75033e6be15a 100644
--- a/drivers/video/fbdev/sis/sis_main.c
+++ b/drivers/video/fbdev/sis/sis_main.c
@@ -183,7 +183,7 @@ static void sisfb_search_mode(char *name, bool quiet)
{
unsigned int j = 0, xres = 0, yres = 0, depth = 0, rate = 0;
int i = 0;
- char strbuf[16], strbuf1[20];
+ char strbuf[24], strbuf1[20];
char *nameptr = name;
/* We don't know the hardware specs yet and there is no ivideo */
diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c
index 2acba56ad42e..d7db6c824e13 100644
--- a/drivers/virt/coco/tdx-guest/tdx-guest.c
+++ b/drivers/virt/coco/tdx-guest/tdx-guest.c
@@ -285,7 +285,6 @@ static long tdx_guest_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations tdx_guest_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = tdx_guest_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice tdx_misc_dev = {
diff --git a/drivers/watchdog/acquirewdt.c b/drivers/watchdog/acquirewdt.c
index 53b04abd55b0..08ca18e91124 100644
--- a/drivers/watchdog/acquirewdt.c
+++ b/drivers/watchdog/acquirewdt.c
@@ -218,7 +218,6 @@ static int acq_close(struct inode *inode, struct file *file)
static const struct file_operations acq_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = acq_write,
.unlocked_ioctl = acq_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/advantechwdt.c b/drivers/watchdog/advantechwdt.c
index 7a0acbc3e4dd..e41cd3ba4e0e 100644
--- a/drivers/watchdog/advantechwdt.c
+++ b/drivers/watchdog/advantechwdt.c
@@ -217,7 +217,6 @@ static int advwdt_close(struct inode *inode, struct file *file)
static const struct file_operations advwdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = advwdt_write,
.unlocked_ioctl = advwdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/alim1535_wdt.c b/drivers/watchdog/alim1535_wdt.c
index bfb9a91ca1df..1ecbd1ac5c3a 100644
--- a/drivers/watchdog/alim1535_wdt.c
+++ b/drivers/watchdog/alim1535_wdt.c
@@ -359,7 +359,6 @@ static int __init ali_find_watchdog(void)
static const struct file_operations ali_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = ali_write,
.unlocked_ioctl = ali_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/alim7101_wdt.c b/drivers/watchdog/alim7101_wdt.c
index 4ff7f5afb7aa..9c7cf939ba3d 100644
--- a/drivers/watchdog/alim7101_wdt.c
+++ b/drivers/watchdog/alim7101_wdt.c
@@ -289,7 +289,6 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static const struct file_operations wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = fop_write,
.open = fop_open,
.release = fop_close,
diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c
index 558015f08c7a..17382512a609 100644
--- a/drivers/watchdog/at91rm9200_wdt.c
+++ b/drivers/watchdog/at91rm9200_wdt.c
@@ -210,7 +210,6 @@ static ssize_t at91_wdt_write(struct file *file, const char *data,
static const struct file_operations at91wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = at91_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.open = at91_wdt_open,
diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c
index e5cc30622b12..d16b2c583fa4 100644
--- a/drivers/watchdog/ath79_wdt.c
+++ b/drivers/watchdog/ath79_wdt.c
@@ -231,7 +231,6 @@ static long ath79_wdt_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations ath79_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = ath79_wdt_write,
.unlocked_ioctl = ath79_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/cpu5wdt.c b/drivers/watchdog/cpu5wdt.c
index 9f279c0e13a6..f94b84048612 100644
--- a/drivers/watchdog/cpu5wdt.c
+++ b/drivers/watchdog/cpu5wdt.c
@@ -185,7 +185,6 @@ static ssize_t cpu5wdt_write(struct file *file, const char __user *buf,
static const struct file_operations cpu5wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = cpu5wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.open = cpu5wdt_open,
diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c
index 901b94d456db..8ee81f018dda 100644
--- a/drivers/watchdog/cpwd.c
+++ b/drivers/watchdog/cpwd.c
@@ -507,7 +507,6 @@ static const struct file_operations cpwd_fops = {
.write = cpwd_write,
.read = cpwd_read,
.release = cpwd_release,
- .llseek = no_llseek,
};
static int cpwd_probe(struct platform_device *op)
diff --git a/drivers/watchdog/eurotechwdt.c b/drivers/watchdog/eurotechwdt.c
index e26609ad4c17..10c647b1226a 100644
--- a/drivers/watchdog/eurotechwdt.c
+++ b/drivers/watchdog/eurotechwdt.c
@@ -368,7 +368,6 @@ static int eurwdt_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations eurwdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = eurwdt_write,
.unlocked_ioctl = eurwdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/gef_wdt.c b/drivers/watchdog/gef_wdt.c
index 6a1db1c783fa..d854fcfbfa5b 100644
--- a/drivers/watchdog/gef_wdt.c
+++ b/drivers/watchdog/gef_wdt.c
@@ -245,7 +245,6 @@ static int gef_wdt_release(struct inode *inode, struct file *file)
static const struct file_operations gef_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = gef_wdt_write,
.unlocked_ioctl = gef_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/geodewdt.c b/drivers/watchdog/geodewdt.c
index 5186c37ad451..4ed6d139320b 100644
--- a/drivers/watchdog/geodewdt.c
+++ b/drivers/watchdog/geodewdt.c
@@ -196,7 +196,6 @@ static long geodewdt_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations geodewdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = geodewdt_write,
.unlocked_ioctl = geodewdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c
index 39ea97009abd..b041ad90a62c 100644
--- a/drivers/watchdog/ib700wdt.c
+++ b/drivers/watchdog/ib700wdt.c
@@ -256,7 +256,6 @@ static int ibwdt_close(struct inode *inode, struct file *file)
static const struct file_operations ibwdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = ibwdt_write,
.unlocked_ioctl = ibwdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/ibmasr.c b/drivers/watchdog/ibmasr.c
index 6955c693b5fd..cf845f865945 100644
--- a/drivers/watchdog/ibmasr.c
+++ b/drivers/watchdog/ibmasr.c
@@ -340,7 +340,6 @@ static int asr_release(struct inode *inode, struct file *file)
static const struct file_operations asr_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = asr_write,
.unlocked_ioctl = asr_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/indydog.c b/drivers/watchdog/indydog.c
index 9857bb74a723..d3092d261345 100644
--- a/drivers/watchdog/indydog.c
+++ b/drivers/watchdog/indydog.c
@@ -149,7 +149,6 @@ static int indydog_notify_sys(struct notifier_block *this,
static const struct file_operations indydog_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = indydog_write,
.unlocked_ioctl = indydog_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/it8712f_wdt.c b/drivers/watchdog/it8712f_wdt.c
index 3ce6a58bd81e..b776e6766c9d 100644
--- a/drivers/watchdog/it8712f_wdt.c
+++ b/drivers/watchdog/it8712f_wdt.c
@@ -341,7 +341,6 @@ static int it8712f_wdt_release(struct inode *inode, struct file *file)
static const struct file_operations it8712f_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = it8712f_wdt_write,
.unlocked_ioctl = it8712f_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/m54xx_wdt.c b/drivers/watchdog/m54xx_wdt.c
index 062ea3e6497e..26bd073bd375 100644
--- a/drivers/watchdog/m54xx_wdt.c
+++ b/drivers/watchdog/m54xx_wdt.c
@@ -179,7 +179,6 @@ static int m54xx_wdt_release(struct inode *inode, struct file *file)
static const struct file_operations m54xx_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = m54xx_wdt_write,
.unlocked_ioctl = m54xx_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/machzwd.c b/drivers/watchdog/machzwd.c
index 73f2221f6222..73d641486909 100644
--- a/drivers/watchdog/machzwd.c
+++ b/drivers/watchdog/machzwd.c
@@ -359,7 +359,6 @@ static int zf_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations zf_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = zf_write,
.unlocked_ioctl = zf_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/mixcomwd.c b/drivers/watchdog/mixcomwd.c
index d387bad377c4..70d9cf84c342 100644
--- a/drivers/watchdog/mixcomwd.c
+++ b/drivers/watchdog/mixcomwd.c
@@ -224,7 +224,6 @@ static long mixcomwd_ioctl(struct file *file,
static const struct file_operations mixcomwd_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = mixcomwd_write,
.unlocked_ioctl = mixcomwd_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/mtx-1_wdt.c b/drivers/watchdog/mtx-1_wdt.c
index 06756135033d..11f05024a181 100644
--- a/drivers/watchdog/mtx-1_wdt.c
+++ b/drivers/watchdog/mtx-1_wdt.c
@@ -177,7 +177,6 @@ static ssize_t mtx1_wdt_write(struct file *file, const char *buf,
static const struct file_operations mtx1_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = mtx1_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.open = mtx1_wdt_open,
diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c
index ac4a9c16341d..f8eb1f65a59e 100644
--- a/drivers/watchdog/nv_tco.c
+++ b/drivers/watchdog/nv_tco.c
@@ -264,7 +264,6 @@ static long nv_tco_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations nv_tco_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = nv_tco_write,
.unlocked_ioctl = nv_tco_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c
index c7f745caf203..fbf835d112b8 100644
--- a/drivers/watchdog/pc87413_wdt.c
+++ b/drivers/watchdog/pc87413_wdt.c
@@ -470,7 +470,6 @@ static int pc87413_notify_sys(struct notifier_block *this,
static const struct file_operations pc87413_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = pc87413_write,
.unlocked_ioctl = pc87413_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/pcwd.c b/drivers/watchdog/pcwd.c
index a793b03a785d..1a4282235aac 100644
--- a/drivers/watchdog/pcwd.c
+++ b/drivers/watchdog/pcwd.c
@@ -749,7 +749,6 @@ static int pcwd_temp_close(struct inode *inode, struct file *file)
static const struct file_operations pcwd_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = pcwd_write,
.unlocked_ioctl = pcwd_ioctl,
.compat_ioctl = compat_ptr_ioctl,
@@ -765,7 +764,6 @@ static struct miscdevice pcwd_miscdev = {
static const struct file_operations pcwd_temp_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = pcwd_temp_read,
.open = pcwd_temp_open,
.release = pcwd_temp_close,
diff --git a/drivers/watchdog/pcwd_pci.c b/drivers/watchdog/pcwd_pci.c
index 54d86fcb1837..a489b426f2ba 100644
--- a/drivers/watchdog/pcwd_pci.c
+++ b/drivers/watchdog/pcwd_pci.c
@@ -643,7 +643,6 @@ static int pcipcwd_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations pcipcwd_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = pcipcwd_write,
.unlocked_ioctl = pcipcwd_ioctl,
.compat_ioctl = compat_ptr_ioctl,
@@ -659,7 +658,6 @@ static struct miscdevice pcipcwd_miscdev = {
static const struct file_operations pcipcwd_temp_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = pcipcwd_temp_read,
.open = pcipcwd_temp_open,
.release = pcipcwd_temp_release,
diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c
index 8202f0a6b093..132699e2f247 100644
--- a/drivers/watchdog/pcwd_usb.c
+++ b/drivers/watchdog/pcwd_usb.c
@@ -549,7 +549,6 @@ static int usb_pcwd_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations usb_pcwd_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = usb_pcwd_write,
.unlocked_ioctl = usb_pcwd_ioctl,
.compat_ioctl = compat_ptr_ioctl,
@@ -565,7 +564,6 @@ static struct miscdevice usb_pcwd_miscdev = {
static const struct file_operations usb_pcwd_temperature_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = usb_pcwd_temperature_read,
.open = usb_pcwd_temperature_open,
.release = usb_pcwd_temperature_release,
diff --git a/drivers/watchdog/pika_wdt.c b/drivers/watchdog/pika_wdt.c
index 782b8c23d99c..393aa4b1bc13 100644
--- a/drivers/watchdog/pika_wdt.c
+++ b/drivers/watchdog/pika_wdt.c
@@ -209,7 +209,6 @@ static long pikawdt_ioctl(struct file *file,
static const struct file_operations pikawdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.open = pikawdt_open,
.release = pikawdt_release,
.write = pikawdt_write,
diff --git a/drivers/watchdog/rc32434_wdt.c b/drivers/watchdog/rc32434_wdt.c
index 417f9b75679c..efadbb9d7ce7 100644
--- a/drivers/watchdog/rc32434_wdt.c
+++ b/drivers/watchdog/rc32434_wdt.c
@@ -242,7 +242,6 @@ static long rc32434_wdt_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations rc32434_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = rc32434_wdt_write,
.unlocked_ioctl = rc32434_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c
index 6176f4343fc5..80490316a27f 100644
--- a/drivers/watchdog/rdc321x_wdt.c
+++ b/drivers/watchdog/rdc321x_wdt.c
@@ -197,7 +197,6 @@ static ssize_t rdc321x_wdt_write(struct file *file, const char __user *buf,
static const struct file_operations rdc321x_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = rdc321x_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.open = rdc321x_wdt_open,
diff --git a/drivers/watchdog/riowd.c b/drivers/watchdog/riowd.c
index b293792a292a..f47d90d01c19 100644
--- a/drivers/watchdog/riowd.c
+++ b/drivers/watchdog/riowd.c
@@ -160,7 +160,6 @@ static ssize_t riowd_write(struct file *file, const char __user *buf,
static const struct file_operations riowd_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = riowd_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.open = riowd_open,
diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c
index 34a917221e31..6e91ee3fbfb5 100644
--- a/drivers/watchdog/sa1100_wdt.c
+++ b/drivers/watchdog/sa1100_wdt.c
@@ -164,7 +164,6 @@ static long sa1100dog_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations sa1100dog_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = sa1100dog_write,
.unlocked_ioctl = sa1100dog_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/sb_wdog.c b/drivers/watchdog/sb_wdog.c
index 504be461f992..eaa68b54cf56 100644
--- a/drivers/watchdog/sb_wdog.c
+++ b/drivers/watchdog/sb_wdog.c
@@ -234,7 +234,6 @@ static int sbwdog_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations sbwdog_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = sbwdog_write,
.unlocked_ioctl = sbwdog_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/sbc60xxwdt.c b/drivers/watchdog/sbc60xxwdt.c
index 7b974802dfc7..e9bf12918ed8 100644
--- a/drivers/watchdog/sbc60xxwdt.c
+++ b/drivers/watchdog/sbc60xxwdt.c
@@ -275,7 +275,6 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static const struct file_operations wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = fop_write,
.open = fop_open,
.release = fop_close,
diff --git a/drivers/watchdog/sbc7240_wdt.c b/drivers/watchdog/sbc7240_wdt.c
index d640b26e18a6..21a1f0b32070 100644
--- a/drivers/watchdog/sbc7240_wdt.c
+++ b/drivers/watchdog/sbc7240_wdt.c
@@ -205,7 +205,6 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static const struct file_operations wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = fop_write,
.open = fop_open,
.release = fop_close,
diff --git a/drivers/watchdog/sbc8360.c b/drivers/watchdog/sbc8360.c
index 4f8b9912fc51..a9fd1615b4c3 100644
--- a/drivers/watchdog/sbc8360.c
+++ b/drivers/watchdog/sbc8360.c
@@ -301,7 +301,6 @@ static int sbc8360_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations sbc8360_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = sbc8360_write,
.open = sbc8360_open,
.release = sbc8360_close,
diff --git a/drivers/watchdog/sbc_epx_c3.c b/drivers/watchdog/sbc_epx_c3.c
index 5e3a9ddb952e..1d291dc0a4a6 100644
--- a/drivers/watchdog/sbc_epx_c3.c
+++ b/drivers/watchdog/sbc_epx_c3.c
@@ -153,7 +153,6 @@ static int epx_c3_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations epx_c3_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = epx_c3_write,
.unlocked_ioctl = epx_c3_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c
index b8eb8d5ca1af..ff9e44825423 100644
--- a/drivers/watchdog/sbc_fitpc2_wdt.c
+++ b/drivers/watchdog/sbc_fitpc2_wdt.c
@@ -181,7 +181,6 @@ static int fitpc2_wdt_release(struct inode *inode, struct file *file)
static const struct file_operations fitpc2_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = fitpc2_wdt_write,
.unlocked_ioctl = fitpc2_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/sc1200wdt.c b/drivers/watchdog/sc1200wdt.c
index f22ebe89fe13..76a58715f665 100644
--- a/drivers/watchdog/sc1200wdt.c
+++ b/drivers/watchdog/sc1200wdt.c
@@ -304,7 +304,6 @@ static struct notifier_block sc1200wdt_notifier = {
static const struct file_operations sc1200wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = sc1200wdt_write,
.unlocked_ioctl = sc1200wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/sc520_wdt.c b/drivers/watchdog/sc520_wdt.c
index ca65468f4b9c..e849e1af267b 100644
--- a/drivers/watchdog/sc520_wdt.c
+++ b/drivers/watchdog/sc520_wdt.c
@@ -331,7 +331,6 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static const struct file_operations wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = fop_write,
.open = fop_open,
.release = fop_close,
diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c
index 409d49880170..76053158d259 100644
--- a/drivers/watchdog/sch311x_wdt.c
+++ b/drivers/watchdog/sch311x_wdt.c
@@ -334,7 +334,6 @@ static int sch311x_wdt_close(struct inode *inode, struct file *file)
static const struct file_operations sch311x_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = sch311x_wdt_write,
.unlocked_ioctl = sch311x_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/scx200_wdt.c b/drivers/watchdog/scx200_wdt.c
index 7b5e18323f3f..4dd8549e3674 100644
--- a/drivers/watchdog/scx200_wdt.c
+++ b/drivers/watchdog/scx200_wdt.c
@@ -198,7 +198,6 @@ static long scx200_wdt_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations scx200_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = scx200_wdt_write,
.unlocked_ioctl = scx200_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/smsc37b787_wdt.c b/drivers/watchdog/smsc37b787_wdt.c
index 7463df479d11..97ca500ec8a8 100644
--- a/drivers/watchdog/smsc37b787_wdt.c
+++ b/drivers/watchdog/smsc37b787_wdt.c
@@ -502,7 +502,6 @@ static int wb_smsc_wdt_notify_sys(struct notifier_block *this,
static const struct file_operations wb_smsc_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = wb_smsc_wdt_write,
.unlocked_ioctl = wb_smsc_wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/w83877f_wdt.c b/drivers/watchdog/w83877f_wdt.c
index f2650863fd02..1937084c182c 100644
--- a/drivers/watchdog/w83877f_wdt.c
+++ b/drivers/watchdog/w83877f_wdt.c
@@ -299,7 +299,6 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static const struct file_operations wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = fop_write,
.open = fop_open,
.release = fop_close,
diff --git a/drivers/watchdog/w83977f_wdt.c b/drivers/watchdog/w83977f_wdt.c
index 31bf21ceaf48..3776030fa7c6 100644
--- a/drivers/watchdog/w83977f_wdt.c
+++ b/drivers/watchdog/w83977f_wdt.c
@@ -443,7 +443,6 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = wdt_write,
.unlocked_ioctl = wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/wafer5823wdt.c b/drivers/watchdog/wafer5823wdt.c
index a8a1ed215e1e..291109349e73 100644
--- a/drivers/watchdog/wafer5823wdt.c
+++ b/drivers/watchdog/wafer5823wdt.c
@@ -227,7 +227,6 @@ static int wafwdt_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations wafwdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = wafwdt_write,
.unlocked_ioctl = wafwdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/wdrtas.c b/drivers/watchdog/wdrtas.c
index c00627825de8..d4fe0bc82211 100644
--- a/drivers/watchdog/wdrtas.c
+++ b/drivers/watchdog/wdrtas.c
@@ -469,7 +469,6 @@ static int wdrtas_reboot(struct notifier_block *this,
static const struct file_operations wdrtas_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = wdrtas_write,
.unlocked_ioctl = wdrtas_ioctl,
.compat_ioctl = compat_ptr_ioctl,
@@ -485,7 +484,6 @@ static struct miscdevice wdrtas_miscdev = {
static const struct file_operations wdrtas_temp_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = wdrtas_temp_read,
.open = wdrtas_temp_open,
.release = wdrtas_temp_close,
diff --git a/drivers/watchdog/wdt.c b/drivers/watchdog/wdt.c
index 183876156243..3980d60bacd8 100644
--- a/drivers/watchdog/wdt.c
+++ b/drivers/watchdog/wdt.c
@@ -520,7 +520,6 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = wdt_write,
.unlocked_ioctl = wdt_ioctl,
.compat_ioctl = compat_ptr_ioctl,
@@ -536,7 +535,6 @@ static struct miscdevice wdt_miscdev = {
static const struct file_operations wdt_temp_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = wdt_temp_read,
.open = wdt_temp_open,
.release = wdt_temp_release,
diff --git a/drivers/watchdog/wdt285.c b/drivers/watchdog/wdt285.c
index 5b7be7a62d54..78681d9f7d53 100644
--- a/drivers/watchdog/wdt285.c
+++ b/drivers/watchdog/wdt285.c
@@ -178,7 +178,6 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations watchdog_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = watchdog_write,
.unlocked_ioctl = watchdog_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/wdt977.c b/drivers/watchdog/wdt977.c
index c9b8e863f70f..4f449ac4dda4 100644
--- a/drivers/watchdog/wdt977.c
+++ b/drivers/watchdog/wdt977.c
@@ -419,7 +419,6 @@ static int wdt977_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations wdt977_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = wdt977_write,
.unlocked_ioctl = wdt977_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/drivers/watchdog/wdt_pci.c b/drivers/watchdog/wdt_pci.c
index d5e56b601351..dc5f29560e9b 100644
--- a/drivers/watchdog/wdt_pci.c
+++ b/drivers/watchdog/wdt_pci.c
@@ -563,7 +563,6 @@ static int wdtpci_notify_sys(struct notifier_block *this, unsigned long code,
static const struct file_operations wdtpci_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = wdtpci_write,
.unlocked_ioctl = wdtpci_ioctl,
.compat_ioctl = compat_ptr_ioctl,
@@ -579,7 +578,6 @@ static struct miscdevice wdtpci_miscdev = {
static const struct file_operations wdtpci_temp_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = wdtpci_temp_read,
.open = wdtpci_temp_open,
.release = wdtpci_temp_release,
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index f7d6f47971fd..62035fe16bb8 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -261,6 +261,7 @@ config XEN_SCSI_BACKEND
config XEN_PRIVCMD
tristate "Xen hypercall passthrough driver"
depends on XEN
+ imply CONFIG_XEN_PCIDEV_BACKEND
default m
help
The hypercall passthrough driver allows privileged user programs to
diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
index 6893c79fd2a1..9e2096524fbc 100644
--- a/drivers/xen/acpi.c
+++ b/drivers/xen/acpi.c
@@ -30,6 +30,7 @@
* IN THE SOFTWARE.
*/
+#include <linux/pci.h>
#include <xen/acpi.h>
#include <xen/interface/platform.h>
#include <asm/xen/hypercall.h>
@@ -75,3 +76,52 @@ int xen_acpi_notify_hypervisor_extended_sleep(u8 sleep_state,
return xen_acpi_notify_hypervisor_state(sleep_state, val_a,
val_b, true);
}
+
+struct acpi_prt_entry {
+ struct acpi_pci_id id;
+ u8 pin;
+ acpi_handle link;
+ u32 index;
+};
+
+int xen_acpi_get_gsi_info(struct pci_dev *dev,
+ int *gsi_out,
+ int *trigger_out,
+ int *polarity_out)
+{
+ int gsi;
+ u8 pin;
+ struct acpi_prt_entry *entry;
+ int trigger = ACPI_LEVEL_SENSITIVE;
+ int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ?
+ ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW;
+
+ if (!dev || !gsi_out || !trigger_out || !polarity_out)
+ return -EINVAL;
+
+ pin = dev->pin;
+ if (!pin)
+ return -EINVAL;
+
+ entry = acpi_pci_irq_lookup(dev, pin);
+ if (entry) {
+ if (entry->link)
+ gsi = acpi_pci_link_allocate_irq(entry->link,
+ entry->index,
+ &trigger, &polarity,
+ NULL);
+ else
+ gsi = entry->index;
+ } else
+ gsi = -1;
+
+ if (gsi < 0)
+ return -EINVAL;
+
+ *gsi_out = gsi;
+ *trigger_out = trigger;
+ *polarity_out = polarity;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xen_acpi_get_gsi_info);
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 9b7fcc7dbb38..7e4a13e632dc 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -694,7 +694,6 @@ static const struct file_operations evtchn_fops = {
.fasync = evtchn_fasync,
.open = evtchn_open,
.release = evtchn_release,
- .llseek = no_llseek,
};
static struct miscdevice evtchn_miscdev = {
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index e9ac3b8c4167..4f65b641c054 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -182,7 +182,6 @@ static const struct file_operations xen_mce_chrdev_ops = {
.read = xen_mce_chrdev_read,
.poll = xen_mce_chrdev_poll,
.unlocked_ioctl = xen_mce_chrdev_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice xen_mce_chrdev_device = {
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index a2facd8f7e51..416f231809cb 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -173,6 +173,19 @@ static int xen_remove_device(struct device *dev)
return r;
}
+int xen_reset_device(const struct pci_dev *dev)
+{
+ struct pci_device_reset device = {
+ .dev.seg = pci_domain_nr(dev->bus),
+ .dev.bus = dev->bus->number,
+ .dev.devfn = dev->devfn,
+ .flags = PCI_DEVICE_RESET_FLR,
+ };
+
+ return HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_reset, &device);
+}
+EXPORT_SYMBOL_GPL(xen_reset_device);
+
static int xen_pci_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 54e4f285c0f4..3273cb8c2a66 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -46,6 +46,9 @@
#include <xen/page.h>
#include <xen/xen-ops.h>
#include <xen/balloon.h>
+#ifdef CONFIG_XEN_ACPI
+#include <xen/acpi.h>
+#endif
#include "privcmd.h"
@@ -844,6 +847,31 @@ out:
return rc;
}
+static long privcmd_ioctl_pcidev_get_gsi(struct file *file, void __user *udata)
+{
+#if defined(CONFIG_XEN_ACPI)
+ int rc = -EINVAL;
+ struct privcmd_pcidev_get_gsi kdata;
+
+ if (copy_from_user(&kdata, udata, sizeof(kdata)))
+ return -EFAULT;
+
+ if (IS_REACHABLE(CONFIG_XEN_PCIDEV_BACKEND))
+ rc = pcistub_get_gsi_from_sbdf(kdata.sbdf);
+
+ if (rc < 0)
+ return rc;
+
+ kdata.gsi = rc;
+ if (copy_to_user(udata, &kdata, sizeof(kdata)))
+ return -EFAULT;
+
+ return 0;
+#else
+ return -EINVAL;
+#endif
+}
+
#ifdef CONFIG_XEN_PRIVCMD_EVENTFD
/* Irqfd support */
static struct workqueue_struct *irqfd_cleanup_wq;
@@ -1543,6 +1571,10 @@ static long privcmd_ioctl(struct file *file,
ret = privcmd_ioctl_ioeventfd(file, udata);
break;
+ case IOCTL_PRIVCMD_PCIDEV_GET_GSI:
+ ret = privcmd_ioctl_pcidev_get_gsi(file, udata);
+ break;
+
default:
break;
}
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c
index 1948a9700c8f..cf568e899ee2 100644
--- a/drivers/xen/xen-pciback/conf_space_capability.c
+++ b/drivers/xen/xen-pciback/conf_space_capability.c
@@ -122,7 +122,7 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
if (err)
goto out;
- new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
+ new_state = (__force pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
new_value &= PM_OK_BITS;
if ((old_value & PM_OK_BITS) != new_value) {
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 4faebbb84999..2f3da5ac62cd 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -21,6 +21,9 @@
#include <xen/events.h>
#include <xen/pci.h>
#include <xen/xen.h>
+#ifdef CONFIG_XEN_ACPI
+#include <xen/acpi.h>
+#endif
#include <asm/xen/hypervisor.h>
#include <xen/interface/physdev.h>
#include "pciback.h"
@@ -53,6 +56,9 @@ struct pcistub_device {
struct pci_dev *dev;
struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */
+#ifdef CONFIG_XEN_ACPI
+ int gsi;
+#endif
};
/* Access to pcistub_devices & seized_devices lists and the initialize_devices
@@ -85,10 +91,23 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
kref_init(&psdev->kref);
spin_lock_init(&psdev->lock);
+#ifdef CONFIG_XEN_ACPI
+ psdev->gsi = -1;
+#endif
return psdev;
}
+static int pcistub_reset_device_state(struct pci_dev *dev)
+{
+ __pci_reset_function_locked(dev);
+
+ if (!xen_pv_domain())
+ return xen_reset_device(dev);
+ else
+ return 0;
+}
+
/* Don't call this directly as it's called by pcistub_device_put */
static void pcistub_device_release(struct kref *kref)
{
@@ -107,7 +126,7 @@ static void pcistub_device_release(struct kref *kref)
/* Call the reset function which does not take lock as this
* is called from "unbind" which takes a device_lock mutex.
*/
- __pci_reset_function_locked(dev);
+ pcistub_reset_device_state(dev);
if (dev_data &&
pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
dev_info(&dev->dev, "Could not reload PCI state\n");
@@ -207,6 +226,25 @@ static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev,
return pci_dev;
}
+#ifdef CONFIG_XEN_ACPI
+int pcistub_get_gsi_from_sbdf(unsigned int sbdf)
+{
+ struct pcistub_device *psdev;
+ int domain = (sbdf >> 16) & 0xffff;
+ int bus = PCI_BUS_NUM(sbdf);
+ int slot = PCI_SLOT(sbdf);
+ int func = PCI_FUNC(sbdf);
+
+ psdev = pcistub_device_find(domain, bus, slot, func);
+
+ if (!psdev)
+ return -ENODEV;
+
+ return psdev->gsi;
+}
+EXPORT_SYMBOL_GPL(pcistub_get_gsi_from_sbdf);
+#endif
+
struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
int domain, int bus,
int slot, int func)
@@ -284,7 +322,7 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
* (so it's ready for the next domain)
*/
device_lock_assert(&dev->dev);
- __pci_reset_function_locked(dev);
+ pcistub_reset_device_state(dev);
dev_data = pci_get_drvdata(dev);
ret = pci_load_saved_state(dev, dev_data->pci_saved_state);
@@ -354,11 +392,20 @@ static int pcistub_match(struct pci_dev *dev)
return found;
}
-static int pcistub_init_device(struct pci_dev *dev)
+static int pcistub_init_device(struct pcistub_device *psdev)
{
struct xen_pcibk_dev_data *dev_data;
+ struct pci_dev *dev;
+#ifdef CONFIG_XEN_ACPI
+ int gsi, trigger, polarity;
+#endif
int err = 0;
+ if (!psdev)
+ return -EINVAL;
+
+ dev = psdev->dev;
+
dev_dbg(&dev->dev, "initializing...\n");
/* The PCI backend is not intended to be a module (or to work with
@@ -420,9 +467,26 @@ static int pcistub_init_device(struct pci_dev *dev)
dev_err(&dev->dev, "Could not store PCI conf saved state!\n");
else {
dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n");
- __pci_reset_function_locked(dev);
+ err = pcistub_reset_device_state(dev);
+ if (err)
+ goto config_release;
pci_restore_state(dev);
}
+
+#ifdef CONFIG_XEN_ACPI
+ if (xen_initial_domain() && xen_pvh_domain()) {
+ err = xen_acpi_get_gsi_info(dev, &gsi, &trigger, &polarity);
+ if (err) {
+ dev_err(&dev->dev, "Fail to get gsi info!\n");
+ goto config_release;
+ }
+ err = xen_pvh_setup_gsi(gsi, trigger, polarity);
+ if (err)
+ goto config_release;
+ psdev->gsi = gsi;
+ }
+#endif
+
/* Now disable the device (this also ensures some private device
* data is setup before we export)
*/
@@ -462,7 +526,7 @@ static int __init pcistub_init_devices_late(void)
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
- err = pcistub_init_device(psdev->dev);
+ err = pcistub_init_device(psdev);
if (err) {
dev_err(&psdev->dev->dev,
"error %d initializing device\n", err);
@@ -532,7 +596,7 @@ static int pcistub_seize(struct pci_dev *dev,
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
/* don't want irqs disabled when calling pcistub_init_device */
- err = pcistub_init_device(psdev->dev);
+ err = pcistub_init_device(psdev);
spin_lock_irqsave(&pcistub_devices_lock, flags);
@@ -757,7 +821,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
}
clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
- res = (pci_ers_result_t)aer_op->err;
+ res = (__force pci_ers_result_t)aer_op->err;
return res;
}
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 6f56640092a9..46f8916597e5 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -700,7 +700,6 @@ const struct file_operations xen_xenbus_fops = {
.open = xenbus_file_open,
.release = xenbus_file_release,
.poll = xenbus_file_poll,
- .llseek = no_llseek,
};
EXPORT_SYMBOL_GPL(xen_xenbus_fops);
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index e11989a57ca0..47455a85c909 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -501,7 +501,7 @@ found:
prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree));
bch2_bkey_val_to_text(&buf, c, extent2);
- struct nonce nonce = extent_nonce(extent.k->version, p.crc);
+ struct nonce nonce = extent_nonce(extent.k->bversion, p.crc);
struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
trans, dup_backpointer_to_bad_csum_extent,
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index c711d4c27a03..f4151ee51b03 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -594,6 +594,7 @@ struct bch_dev {
#define BCH_FS_FLAGS() \
x(new_fs) \
x(started) \
+ x(clean_recovery) \
x(btree_running) \
x(accounting_replay_done) \
x(may_go_rw) \
@@ -776,7 +777,7 @@ struct bch_fs {
unsigned nsec_per_time_unit;
u64 features;
u64 compat;
- unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
+ unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
u64 btrees_lost_data;
} sb;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 8c4addddd07e..203ee627cab5 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -217,7 +217,7 @@ struct bkey {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
__u8 pad[1];
- struct bversion version;
+ struct bversion bversion;
__u32 size; /* extent size, in sectors */
struct bpos p;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
@@ -328,8 +328,8 @@ enum bch_bkey_fields {
bkey_format_field(OFFSET, p.offset), \
bkey_format_field(SNAPSHOT, p.snapshot), \
bkey_format_field(SIZE, size), \
- bkey_format_field(VERSION_HI, version.hi), \
- bkey_format_field(VERSION_LO, version.lo), \
+ bkey_format_field(VERSION_HI, bversion.hi), \
+ bkey_format_field(VERSION_LO, bversion.lo), \
}, \
})
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index e34cb2bf329c..41df24a53d97 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -214,9 +214,9 @@ static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
#define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 })
#define MAX_VERSION ((struct bversion) { .hi = ~0, .lo = ~0ULL })
-static __always_inline int bversion_zero(struct bversion v)
+static __always_inline bool bversion_zero(struct bversion v)
{
- return !bversion_cmp(v, ZERO_VERSION);
+ return bversion_cmp(v, ZERO_VERSION) == 0;
}
#ifdef CONFIG_BCACHEFS_DEBUG
@@ -554,8 +554,8 @@ static inline void bch2_bkey_pack_test(void) {}
x(BKEY_FIELD_OFFSET, p.offset) \
x(BKEY_FIELD_SNAPSHOT, p.snapshot) \
x(BKEY_FIELD_SIZE, size) \
- x(BKEY_FIELD_VERSION_HI, version.hi) \
- x(BKEY_FIELD_VERSION_LO, version.lo)
+ x(BKEY_FIELD_VERSION_HI, bversion.hi) \
+ x(BKEY_FIELD_VERSION_LO, bversion.lo)
struct bkey_format_state {
u64 field_min[BKEY_NR_FIELDS];
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 88d8958281e8..e7ac227ba7e8 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -289,7 +289,7 @@ void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
bch2_bpos_to_text(out, k->p);
- prt_printf(out, " len %u ver %llu", k->size, k->version.lo);
+ prt_printf(out, " len %u ver %llu", k->size, k->bversion.lo);
} else {
prt_printf(out, "(null)");
}
diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
index 3df3dd2723a1..018fb72e32d3 100644
--- a/fs/bcachefs/bkey_methods.h
+++ b/fs/bcachefs/bkey_methods.h
@@ -70,7 +70,7 @@ bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r)
{
return l->type == r->type &&
- !bversion_cmp(l->version, r->version) &&
+ !bversion_cmp(l->bversion, r->bversion) &&
bpos_eq(l->p, bkey_start_pos(r));
}
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index b5e0692f03c6..660d2fa02da2 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -513,6 +513,8 @@ int bch2_check_topology(struct bch_fs *c)
struct bpos pulled_from_scan = POS_MIN;
int ret = 0;
+ bch2_trans_srcu_unlock(trans);
+
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
bool reconstructed_root = false;
@@ -599,15 +601,15 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
if (initial) {
BUG_ON(bch2_journal_seq_verify &&
- k.k->version.lo > atomic64_read(&c->journal.seq));
+ k.k->bversion.lo > atomic64_read(&c->journal.seq));
if (fsck_err_on(btree_id != BTREE_ID_accounting &&
- k.k->version.lo > atomic64_read(&c->key_version),
+ k.k->bversion.lo > atomic64_read(&c->key_version),
trans, bkey_version_in_future,
"key version number higher than recorded %llu\n %s",
atomic64_read(&c->key_version),
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
- atomic64_set(&c->key_version, k.k->version.lo);
+ atomic64_set(&c->key_version, k.k->bversion.lo);
}
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index cb48a9477514..1c1448b52207 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1195,6 +1195,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
set_btree_bset(b, b->set, &b->data->keys);
b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter);
+ memset((uint8_t *)(sorted + 1) + b->nr.live_u64s * sizeof(u64), 0,
+ btree_buf_bytes(b) -
+ sizeof(struct btree_node) -
+ b->nr.live_u64s * sizeof(u64));
u64s = le16_to_cpu(sorted->keys.u64s);
*sorted = *b->data;
@@ -1219,7 +1223,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
ret = bch2_bkey_val_validate(c, u.s_c, READ);
if (ret == -BCH_ERR_fsck_delete_bkey ||
(bch2_inject_invalid_keys &&
- !bversion_cmp(u.k->version, MAX_VERSION))) {
+ !bversion_cmp(u.k->bversion, MAX_VERSION))) {
btree_keys_account_key_drop(&b->nr, 0, k);
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index b28c649c6838..1e694fedc5da 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -275,7 +275,7 @@ static int read_btree_nodes(struct find_btree_nodes *f)
w->ca = ca;
t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
- ret = IS_ERR_OR_NULL(t);
+ ret = PTR_ERR_OR_ZERO(t);
if (ret) {
percpu_ref_put(&ca->io_ref);
closure_put(&cl);
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 91884da4e30a..1a74a1a252ee 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -684,10 +684,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
!(flags & BCH_TRANS_COMMIT_no_journal_res)) {
if (bch2_journal_seq_verify)
trans_for_each_update(trans, i)
- i->k->k.version.lo = trans->journal_res.seq;
+ i->k->k.bversion.lo = trans->journal_res.seq;
else if (bch2_inject_invalid_keys)
trans_for_each_update(trans, i)
- i->k->k.version = MAX_VERSION;
+ i->k->k.bversion = MAX_VERSION;
}
h = trans->hooks;
@@ -700,27 +700,31 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
struct jset_entry *entry = trans->journal_entries;
- if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) {
- percpu_down_read(&c->mark_lock);
+ percpu_down_read(&c->mark_lock);
+
+ for (entry = trans->journal_entries;
+ entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
+ entry = vstruct_next(entry))
+ if (entry->type == BCH_JSET_ENTRY_write_buffer_keys &&
+ entry->start->k.type == KEY_TYPE_accounting) {
+ BUG_ON(!trans->journal_res.ref);
+
+ struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start);
- for (entry = trans->journal_entries;
- entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
- entry = vstruct_next(entry))
- if (jset_entry_is_key(entry) && entry->start->k.type == KEY_TYPE_accounting) {
- struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start);
+ a->k.bversion = journal_pos_to_bversion(&trans->journal_res,
+ (u64 *) entry - (u64 *) trans->journal_entries);
+ BUG_ON(bversion_zero(a->k.bversion));
- a->k.version = journal_pos_to_bversion(&trans->journal_res,
- (u64 *) entry - (u64 *) trans->journal_entries);
- BUG_ON(bversion_zero(a->k.version));
- ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false);
+ if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) {
+ ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal);
if (ret)
goto revert_fs_usage;
}
- percpu_up_read(&c->mark_lock);
+ }
+ percpu_up_read(&c->mark_lock);
- /* XXX: we only want to run this if deltas are nonzero */
- bch2_trans_account_disk_usage_change(trans);
- }
+ /* XXX: we only want to run this if deltas are nonzero */
+ bch2_trans_account_disk_usage_change(trans);
trans_for_each_update(trans, i)
if (btree_node_type_has_atomic_triggers(i->bkey_type)) {
@@ -735,6 +739,40 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
goto fatal_err;
}
+ trans_for_each_update(trans, i) {
+ enum bch_validate_flags invalid_flags = 0;
+
+ if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
+ invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
+
+ ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k),
+ i->bkey_type, invalid_flags);
+ if (unlikely(ret)){
+ bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n",
+ trans->fn, (void *) i->ip_allocated);
+ goto fatal_err;
+ }
+ btree_insert_entry_checks(trans, i);
+ }
+
+ for (struct jset_entry *i = trans->journal_entries;
+ i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
+ i = vstruct_next(i)) {
+ enum bch_validate_flags invalid_flags = 0;
+
+ if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
+ invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
+
+ ret = bch2_journal_entry_validate(c, NULL, i,
+ bcachefs_metadata_version_current,
+ CPU_BIG_ENDIAN, invalid_flags);
+ if (unlikely(ret)) {
+ bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n",
+ trans->fn);
+ goto fatal_err;
+ }
+ }
+
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) {
struct journal *j = &c->journal;
struct jset_entry *entry;
@@ -798,7 +836,7 @@ revert_fs_usage:
struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start);
bch2_accounting_neg(a);
- bch2_accounting_mem_mod_locked(trans, a.c, false, false);
+ bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal);
bch2_accounting_neg(a);
}
percpu_up_read(&c->mark_lock);
@@ -1019,40 +1057,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (ret)
goto out_reset;
- trans_for_each_update(trans, i) {
- enum bch_validate_flags invalid_flags = 0;
-
- if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
- invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
-
- ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k),
- i->bkey_type, invalid_flags);
- if (unlikely(ret)){
- bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n",
- trans->fn, (void *) i->ip_allocated);
- return ret;
- }
- btree_insert_entry_checks(trans, i);
- }
-
- for (struct jset_entry *i = trans->journal_entries;
- i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
- i = vstruct_next(i)) {
- enum bch_validate_flags invalid_flags = 0;
-
- if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
- invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
-
- ret = bch2_journal_entry_validate(c, NULL, i,
- bcachefs_metadata_version_current,
- CPU_BIG_ENDIAN, invalid_flags);
- if (unlikely(ret)) {
- bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n",
- trans->fn);
- return ret;
- }
- }
-
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) {
ret = do_bch2_trans_commit_to_journal_replay(trans);
goto out_reset;
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 60393e98084d..6a454f2fa005 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -220,7 +220,8 @@ static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *t
if (type && k.k->type != type)
return ERR_PTR(-ENOENT);
- mut = bch2_trans_kmalloc_nomemzero(trans, bytes);
+ /* extra padding for varint_decode_fast... */
+ mut = bch2_trans_kmalloc_nomemzero(trans, bytes + 8);
if (!IS_ERR(mut)) {
bkey_reassemble(mut, k);
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index ef1f74866e23..cbfd88f98472 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -471,7 +471,6 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
static const struct file_operations bcachefs_data_ops = {
.release = bch2_data_job_release,
.read = bch2_data_job_read,
- .llseek = no_llseek,
};
static long bch2_ioctl_data(struct bch_fs *c,
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 757b9884ef55..462b1a2fe1ad 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -639,7 +639,7 @@ int bch2_data_update_init(struct btree_trans *trans,
bch2_write_op_init(&m->op, c, io_opts);
m->op.pos = bkey_start_pos(k.k);
- m->op.version = k.k->version;
+ m->op.version = k.k->bversion;
m->op.target = data_opts.target;
m->op.write_point = wp;
m->op.nr_replicas = 0;
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index e972e2bca546..9f3133e3e7e5 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -134,6 +134,10 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
void *end = &acc_k + 1;
int ret = 0;
+ bkey_fsck_err_on(bversion_zero(k.k->bversion),
+ c, accounting_key_version_0,
+ "accounting key with version=0");
+
switch (acc_k.type) {
case BCH_DISK_ACCOUNTING_nr_inodes:
end = field_end(acc_k, nr_inodes);
@@ -291,7 +295,7 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun
struct accounting_mem_entry n = {
.pos = a.k->p,
- .version = a.k->version,
+ .bversion = a.k->bversion,
.nr_counters = bch2_accounting_counters(a.k),
.v[0] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64),
sizeof(u64), GFP_KERNEL),
@@ -319,11 +323,13 @@ err:
return -BCH_ERR_ENOMEM_disk_accounting;
}
-int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc)
+int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
+ enum bch_accounting_mode mode)
{
struct bch_replicas_padded r;
- if (accounting_to_replicas(&r.e, a.k->p) &&
+ if (mode != BCH_ACCOUNTING_read &&
+ accounting_to_replicas(&r.e, a.k->p) &&
!bch2_replicas_marked_locked(c, &r.e))
return -BCH_ERR_btree_insert_need_mark_replicas;
@@ -566,7 +572,9 @@ int bch2_gc_accounting_done(struct bch_fs *c)
struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i;
accounting_key_init(&k_i.k, &acc_k, src_v, nr);
- bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false);
+ bch2_accounting_mem_mod_locked(trans,
+ bkey_i_to_s_c_accounting(&k_i.k),
+ BCH_ACCOUNTING_normal);
preempt_disable();
struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
@@ -589,30 +597,14 @@ fsck_err:
static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct printbuf buf = PRINTBUF;
if (k.k->type != KEY_TYPE_accounting)
return 0;
percpu_down_read(&c->mark_lock);
- int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true);
+ int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k),
+ BCH_ACCOUNTING_read);
percpu_up_read(&c->mark_lock);
-
- if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) &&
- ret == -BCH_ERR_btree_insert_need_mark_replicas)
- ret = 0;
-
- struct disk_accounting_pos acc;
- bpos_to_disk_accounting_pos(&acc, k.k->p);
-
- if (fsck_err_on(ret == -BCH_ERR_btree_insert_need_mark_replicas,
- trans, accounting_replicas_not_marked,
- "accounting not marked in superblock replicas\n %s",
- (bch2_accounting_key_to_text(&buf, &acc),
- buf.buf)))
- ret = bch2_accounting_update_sb_one(c, k.k->p);
-fsck_err:
- printbuf_exit(&buf);
return ret;
}
@@ -624,6 +616,7 @@ int bch2_accounting_read(struct bch_fs *c)
{
struct bch_accounting_mem *acc = &c->accounting;
struct btree_trans *trans = bch2_trans_get(c);
+ struct printbuf buf = PRINTBUF;
int ret = for_each_btree_key(trans, iter,
BTREE_ID_accounting, POS_MIN,
@@ -647,7 +640,7 @@ int bch2_accounting_read(struct bch_fs *c)
accounting_pos_cmp, &k.k->p);
bool applied = idx < acc->k.nr &&
- bversion_cmp(acc->k.data[idx].version, k.k->version) >= 0;
+ bversion_cmp(acc->k.data[idx].bversion, k.k->bversion) >= 0;
if (applied)
continue;
@@ -655,7 +648,7 @@ int bch2_accounting_read(struct bch_fs *c)
if (i + 1 < &darray_top(*keys) &&
i[1].k->k.type == KEY_TYPE_accounting &&
!journal_key_cmp(i, i + 1)) {
- BUG_ON(bversion_cmp(i[0].k->k.version, i[1].k->k.version) >= 0);
+ WARN_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0);
i[1].journal_seq = i[0].journal_seq;
@@ -674,6 +667,45 @@ int bch2_accounting_read(struct bch_fs *c)
keys->gap = keys->nr = dst - keys->data;
percpu_down_read(&c->mark_lock);
+ for (unsigned i = 0; i < acc->k.nr; i++) {
+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
+ bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
+
+ if (bch2_is_zero(v, sizeof(v[0]) * acc->k.data[i].nr_counters))
+ continue;
+
+ struct bch_replicas_padded r;
+ if (!accounting_to_replicas(&r.e, acc->k.data[i].pos))
+ continue;
+
+ /*
+ * If the replicas entry is invalid it'll get cleaned up by
+ * check_allocations:
+ */
+ if (bch2_replicas_entry_validate(&r.e, c, &buf))
+ continue;
+
+ struct disk_accounting_pos k;
+ bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
+
+ if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
+ trans, accounting_replicas_not_marked,
+ "accounting not marked in superblock replicas\n %s",
+ (printbuf_reset(&buf),
+ bch2_accounting_key_to_text(&buf, &k),
+ buf.buf))) {
+ /*
+ * We're not RW yet and still single threaded, dropping
+ * and retaking lock is ok:
+ */
+ percpu_up_read(&c->mark_lock);
+ ret = bch2_mark_replicas(c, &r.e);
+ if (ret)
+ goto fsck_err;
+ percpu_down_read(&c->mark_lock);
+ }
+ }
+
preempt_disable();
struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
@@ -709,8 +741,10 @@ int bch2_accounting_read(struct bch_fs *c)
}
}
preempt_enable();
+fsck_err:
percpu_up_read(&c->mark_lock);
err:
+ printbuf_exit(&buf);
bch2_trans_put(trans);
bch_err_fn(c, ret);
return ret;
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index f29fd0dd9581..4ea6c8a092bc 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -36,8 +36,8 @@ static inline void bch2_accounting_accumulate(struct bkey_i_accounting *dst,
for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++)
dst->v.d[i] += src.v->d[i];
- if (bversion_cmp(dst->k.version, src.k->version) < 0)
- dst->k.version = src.k->version;
+ if (bversion_cmp(dst->k.bversion, src.k->bversion) < 0)
+ dst->k.bversion = src.k->bversion;
}
static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage,
@@ -103,23 +103,35 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r)
return bpos_cmp(*l, *r);
}
-int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool);
+enum bch_accounting_mode {
+ BCH_ACCOUNTING_normal,
+ BCH_ACCOUNTING_gc,
+ BCH_ACCOUNTING_read,
+};
+
+int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
void bch2_accounting_mem_gc(struct bch_fs *);
/*
* Update in memory counters so they match the btree update we're doing; called
* from transaction commit path
*/
-static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read)
+static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
+ struct bkey_s_c_accounting a,
+ enum bch_accounting_mode mode)
{
struct bch_fs *c = trans->c;
+ struct bch_accounting_mem *acc = &c->accounting;
struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, a.k->p);
+ bool gc = mode == BCH_ACCOUNTING_gc;
+
+ EBUG_ON(gc && !acc->gc_running);
if (acc_k.type == BCH_DISK_ACCOUNTING_inum)
return 0;
- if (!gc && !read) {
+ if (mode == BCH_ACCOUNTING_normal) {
switch (acc_k.type) {
case BCH_DISK_ACCOUNTING_persistent_reserved:
trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0];
@@ -140,14 +152,11 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru
}
}
- struct bch_accounting_mem *acc = &c->accounting;
unsigned idx;
- EBUG_ON(gc && !acc->gc_running);
-
while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &a.k->p)) >= acc->k.nr) {
- int ret = bch2_accounting_mem_insert(c, a, gc);
+ int ret = bch2_accounting_mem_insert(c, a, mode);
if (ret)
return ret;
}
@@ -164,7 +173,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru
static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc)
{
percpu_down_read(&trans->c->mark_lock);
- int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false);
+ int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal);
percpu_up_read(&trans->c->mark_lock);
return ret;
}
diff --git a/fs/bcachefs/disk_accounting_types.h b/fs/bcachefs/disk_accounting_types.h
index 1687a45177a7..b1982131b206 100644
--- a/fs/bcachefs/disk_accounting_types.h
+++ b/fs/bcachefs/disk_accounting_types.h
@@ -6,7 +6,7 @@
struct accounting_mem_entry {
struct bpos pos;
- struct bversion version;
+ struct bversion bversion;
unsigned nr_counters;
u64 __percpu *v[2];
};
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 95afa7bf2020..3a16b535b6c3 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -239,7 +239,19 @@ int __bch2_fsck_err(struct bch_fs *c,
if (!c)
c = trans->c;
- WARN_ON(!trans && bch2_current_has_btree_trans(c));
+ /*
+ * Ugly: if there's a transaction in the current task it has to be
+ * passed in to unlock if we prompt for user input.
+ *
+ * But, plumbing a transaction and transaction restarts into
+ * bkey_validate() is problematic.
+ *
+ * So:
+ * - make all bkey errors AUTOFIX, they're simple anyways (we just
+ * delete the key)
+ * - and we don't need to warn if we're not prompting
+ */
+ WARN_ON(!(flags & FSCK_AUTOFIX) && !trans && bch2_current_has_btree_trans(c));
if ((flags & FSCK_CAN_FIX) &&
test_bit(err, c->sb.errors_silent))
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 2f1b86978f36..21ee7211b03e 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -184,7 +184,7 @@ do { \
ret = -BCH_ERR_fsck_delete_bkey; \
goto fsck_err; \
} \
- int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \
+ int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX|FSCK_AUTOFIX,\
BCH_FSCK_ERR_##_err_type, \
_err_msg, ##__VA_ARGS__); \
if (_ret != -BCH_ERR_fsck_fix && \
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 9b3470a97546..0d8b782b63fb 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -21,6 +21,49 @@
#include <linux/bsearch.h>
#include <linux/dcache.h> /* struct qstr */
+static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
+ struct bkey_s_c_dirent d)
+{
+ return inode->bi_dir == d.k->p.inode &&
+ inode->bi_dir_offset == d.k->p.offset;
+}
+
+static bool dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d,
+ struct bch_inode_unpacked *inode)
+{
+ if (d.v->d_type == DT_SUBVOL
+ ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
+ : le64_to_cpu(d.v->d_inum) == inode->bi_inum)
+ return 0;
+ return -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
+}
+
+static void dirent_inode_mismatch_msg(struct printbuf *out,
+ struct bch_fs *c,
+ struct bkey_s_c_dirent dirent,
+ struct bch_inode_unpacked *inode)
+{
+ prt_str(out, "inode points to dirent that does not point back:");
+ prt_newline(out);
+ bch2_bkey_val_to_text(out, c, dirent.s_c);
+ prt_newline(out);
+ bch2_inode_unpacked_to_text(out, inode);
+}
+
+static int dirent_points_to_inode(struct bch_fs *c,
+ struct bkey_s_c_dirent dirent,
+ struct bch_inode_unpacked *inode)
+{
+ int ret = dirent_points_to_inode_nowarn(dirent, inode);
+ if (ret) {
+ struct printbuf buf = PRINTBUF;
+ dirent_inode_mismatch_msg(&buf, c, dirent, inode);
+ bch_warn(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+ return ret;
+}
+
/*
* XXX: this is handling transaction restarts without returning
* -BCH_ERR_transaction_restart_nested, this is not how we do things anymore:
@@ -346,14 +389,17 @@ static int reattach_inode(struct btree_trans *trans,
static int remove_backpointer(struct btree_trans *trans,
struct bch_inode_unpacked *inode)
{
- struct btree_iter iter;
- struct bkey_s_c_dirent d;
- int ret;
+ if (!inode->bi_dir)
+ return 0;
- d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents,
- POS(inode->bi_dir, inode->bi_dir_offset), 0,
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c_dirent d =
+ bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents,
+ SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot), 0,
dirent);
- ret = bkey_err(d) ?:
+ int ret = bkey_err(d) ?:
+ dirent_points_to_inode(c, d, inode) ?:
__remove_dirent(trans, d.k->p);
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -371,7 +417,8 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume
return ret;
ret = remove_backpointer(trans, &inode);
- bch_err_msg(c, ret, "removing dirent");
+ if (!bch2_err_matches(ret, ENOENT))
+ bch_err_msg(c, ret, "removing dirent");
if (ret)
return ret;
@@ -626,12 +673,12 @@ static int ref_visible2(struct bch_fs *c,
struct inode_walker_entry {
struct bch_inode_unpacked inode;
u32 snapshot;
- bool seen_this_pos;
u64 count;
};
struct inode_walker {
bool first_this_inode;
+ bool have_inodes;
bool recalculate_sums;
struct bpos last_pos;
@@ -669,6 +716,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
struct bkey_s_c k;
int ret;
+ /*
+ * We no longer have inodes for w->last_pos; clear this to avoid
+ * screwing up check_i_sectors/check_subdir_count if we take a
+ * transaction restart here:
+ */
+ w->have_inodes = false;
w->recalculate_sums = false;
w->inodes.nr = 0;
@@ -686,6 +739,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
return ret;
w->first_this_inode = true;
+ w->have_inodes = true;
return 0;
}
@@ -740,9 +794,6 @@ static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
if (ret)
return ERR_PTR(ret);
- } else if (bkey_cmp(w->last_pos, k.k->p)) {
- darray_for_each(w->inodes, i)
- i->seen_this_pos = false;
}
w->last_pos = k.k->p;
@@ -896,21 +947,6 @@ static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot));
}
-static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
- struct bkey_s_c_dirent d)
-{
- return inode->bi_dir == d.k->p.inode &&
- inode->bi_dir_offset == d.k->p.offset;
-}
-
-static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
- struct bch_inode_unpacked *inode)
-{
- return d.v->d_type == DT_SUBVOL
- ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
- : le64_to_cpu(d.v->d_inum) == inode->bi_inum;
-}
-
static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
{
struct btree_iter iter;
@@ -920,13 +956,14 @@ static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
return ret;
}
-static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k,
+static int check_inode_dirent_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode,
- u32 inode_snapshot, bool *write_inode)
+ bool *write_inode)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
+ u32 inode_snapshot = inode->bi_snapshot;
struct btree_iter dirent_iter = {};
struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot);
int ret = bkey_err(d);
@@ -936,13 +973,13 @@ static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c i
if (fsck_err_on(ret,
trans, inode_points_to_missing_dirent,
"inode points to missing dirent\n%s",
- (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) ||
- fsck_err_on(!ret && !dirent_points_to_inode(d, inode),
+ (bch2_inode_unpacked_to_text(&buf, inode), buf.buf)) ||
+ fsck_err_on(!ret && dirent_points_to_inode_nowarn(d, inode),
trans, inode_points_to_wrong_dirent,
- "inode points to dirent that does not point back:\n%s",
- (bch2_bkey_val_to_text(&buf, c, inode_k),
- prt_newline(&buf),
- bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
+ "%s",
+ (printbuf_reset(&buf),
+ dirent_inode_mismatch_msg(&buf, c, d, inode),
+ buf.buf))) {
/*
* We just clear the backpointer fields for now. If we find a
* dirent that points to this inode in check_dirents(), we'll
@@ -963,7 +1000,7 @@ fsck_err:
return ret;
}
-static bool bch2_inode_open(struct bch_fs *c, struct bpos p)
+static bool bch2_inode_is_open(struct bch_fs *c, struct bpos p)
{
subvol_inum inum = {
.subvol = snapshot_t(c, p.snapshot)->subvol,
@@ -972,7 +1009,7 @@ static bool bch2_inode_open(struct bch_fs *c, struct bpos p)
/* snapshot tree corruption, can't safely delete */
if (!inum.subvol) {
- bch_err_ratelimited(c, "%s(): snapshot %u has no subvol", __func__, p.snapshot);
+ bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot);
return true;
}
@@ -1045,30 +1082,44 @@ static int check_inode(struct btree_trans *trans,
}
if (u.bi_flags & BCH_INODE_unlinked) {
- ret = check_inode_deleted_list(trans, k.k->p);
- if (ret < 0)
- return ret;
+ if (!test_bit(BCH_FS_started, &c->flags)) {
+ /*
+ * If we're not in online fsck, don't delete unlinked
+ * inodes, just make sure they're on the deleted list.
+ *
+ * They might be referred to by a logged operation -
+ * i.e. we might have crashed in the middle of a
+ * truncate on an unlinked but open file - so we want to
+ * let the delete_dead_inodes kill it after resuming
+ * logged ops.
+ */
+ ret = check_inode_deleted_list(trans, k.k->p);
+ if (ret < 0)
+ return ret;
- fsck_err_on(!ret,
- trans, unlinked_inode_not_on_deleted_list,
- "inode %llu:%u unlinked, but not on deleted list",
- u.bi_inum, k.k->p.snapshot);
- ret = 0;
- }
+ fsck_err_on(!ret,
+ trans, unlinked_inode_not_on_deleted_list,
+ "inode %llu:%u unlinked, but not on deleted list",
+ u.bi_inum, k.k->p.snapshot);
- if (u.bi_flags & BCH_INODE_unlinked &&
- !bch2_inode_open(c, k.k->p) &&
- (!c->sb.clean ||
- fsck_err(trans, inode_unlinked_but_clean,
- "filesystem marked clean, but inode %llu unlinked",
- u.bi_inum))) {
- ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
- bch_err_msg(c, ret, "in fsck deleting inode");
- return ret;
+ ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, k.k->p, 1);
+ if (ret)
+ goto err;
+ } else {
+ if (fsck_err_on(bch2_inode_is_open(c, k.k->p),
+ trans, inode_unlinked_and_not_open,
+ "inode %llu%u unlinked and not open",
+ u.bi_inum, u.bi_snapshot)) {
+ ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
+ bch_err_msg(c, ret, "in fsck deleting inode");
+ return ret;
+ }
+ }
}
+ /* i_size_dirty is vestigal, since we now have logged ops for truncate * */
if (u.bi_flags & BCH_INODE_i_size_dirty &&
- (!c->sb.clean ||
+ (!test_bit(BCH_FS_clean_recovery, &c->flags) ||
fsck_err(trans, inode_i_size_dirty_but_clean,
"filesystem marked clean, but inode %llu has i_size dirty",
u.bi_inum))) {
@@ -1097,8 +1148,9 @@ static int check_inode(struct btree_trans *trans,
do_update = true;
}
+ /* i_sectors_dirty is vestigal, i_sectors is always updated transactionally */
if (u.bi_flags & BCH_INODE_i_sectors_dirty &&
- (!c->sb.clean ||
+ (!test_bit(BCH_FS_clean_recovery, &c->flags) ||
fsck_err(trans, inode_i_sectors_dirty_but_clean,
"filesystem marked clean, but inode %llu has i_sectors dirty",
u.bi_inum))) {
@@ -1126,7 +1178,7 @@ static int check_inode(struct btree_trans *trans,
}
if (u.bi_dir || u.bi_dir_offset) {
- ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update);
+ ret = check_inode_dirent_inode(trans, &u, &do_update);
if (ret)
goto err;
}
@@ -1555,10 +1607,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c k,
struct inode_walker *inode,
struct snapshots_seen *s,
- struct extent_ends *extent_ends)
+ struct extent_ends *extent_ends,
+ struct disk_reservation *res)
{
struct bch_fs *c = trans->c;
- struct inode_walker_entry *i;
struct printbuf buf = PRINTBUF;
int ret = 0;
@@ -1568,7 +1620,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto out;
}
- if (inode->last_pos.inode != k.k->p.inode) {
+ if (inode->last_pos.inode != k.k->p.inode && inode->have_inodes) {
ret = check_i_sectors(trans, inode);
if (ret)
goto err;
@@ -1578,12 +1630,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
if (ret)
goto err;
- i = walk_inode(trans, inode, k);
- ret = PTR_ERR_OR_ZERO(i);
+ struct inode_walker_entry *extent_i = walk_inode(trans, inode, k);
+ ret = PTR_ERR_OR_ZERO(extent_i);
if (ret)
goto err;
- ret = check_key_has_inode(trans, iter, inode, i, k);
+ ret = check_key_has_inode(trans, iter, inode, extent_i, k);
if (ret)
goto err;
@@ -1592,24 +1644,19 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
&inode->recalculate_sums);
if (ret)
goto err;
- }
- /*
- * Check inodes in reverse order, from oldest snapshots to newest,
- * starting from the inode that matches this extent's snapshot. If we
- * didn't have one, iterate over all inodes:
- */
- if (!i)
- i = &darray_last(inode->inodes);
-
- for (;
- inode->inodes.data && i >= inode->inodes.data;
- --i) {
- if (i->snapshot > k.k->p.snapshot ||
- !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
- continue;
+ /*
+ * Check inodes in reverse order, from oldest snapshots to
+ * newest, starting from the inode that matches this extent's
+ * snapshot. If we didn't have one, iterate over all inodes:
+ */
+ for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
+ inode->inodes.data && i >= inode->inodes.data;
+ --i) {
+ if (i->snapshot > k.k->p.snapshot ||
+ !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
+ continue;
- if (k.k->type != KEY_TYPE_whiteout) {
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) &&
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
!bkey_extent_is_reservation(k),
@@ -1629,13 +1676,25 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto err;
iter->k.type = KEY_TYPE_whiteout;
+ break;
}
-
- if (bkey_extent_is_allocation(k.k))
- i->count += k.k->size;
}
+ }
- i->seen_this_pos = true;
+ ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto err;
+
+ if (bkey_extent_is_allocation(k.k)) {
+ for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
+ inode->inodes.data && i >= inode->inodes.data;
+ --i) {
+ if (i->snapshot > k.k->p.snapshot ||
+ !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
+ continue;
+
+ i->count += k.k->size;
+ }
}
if (k.k->type != KEY_TYPE_whiteout) {
@@ -1666,13 +1725,11 @@ int bch2_check_extents(struct bch_fs *c)
extent_ends_init(&extent_ends);
int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_extents,
+ for_each_btree_key(trans, iter, BTREE_ID_extents,
POS(BCACHEFS_ROOT_INO, 0),
- BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
- &res, NULL,
- BCH_TRANS_COMMIT_no_enospc, ({
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
bch2_disk_reservation_put(c, &res);
- check_extent(trans, &iter, k, &w, &s, &extent_ends) ?:
+ check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?:
check_extent_overbig(trans, &iter, k);
})) ?:
check_i_sectors_notnested(trans, &w));
@@ -1758,6 +1815,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
+ struct btree_iter bp_iter = { NULL };
int ret = 0;
if (inode_points_to_dirent(target, d))
@@ -1770,7 +1828,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
prt_printf(&buf, "\n "),
bch2_inode_unpacked_to_text(&buf, target),
buf.buf)))
- goto out_noiter;
+ goto err;
if (!target->bi_dir &&
!target->bi_dir_offset) {
@@ -1779,7 +1837,6 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
return __bch2_fsck_write_inode(trans, target, target_snapshot);
}
- struct btree_iter bp_iter = { NULL };
struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
ret = bkey_err(bp_dirent);
@@ -1840,7 +1897,6 @@ out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &bp_iter);
-out_noiter:
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
@@ -2075,7 +2131,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (k.k->type == KEY_TYPE_whiteout)
goto out;
- if (dir->last_pos.inode != k.k->p.inode) {
+ if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) {
ret = check_subdir_count(trans, dir);
if (ret)
goto err;
@@ -2137,11 +2193,15 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (ret)
goto err;
}
-
- if (d.v->d_type == DT_DIR)
- for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
- i->count++;
}
+
+ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto err;
+
+ if (d.v->d_type == DT_DIR)
+ for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
+ i->count++;
out:
err:
fsck_err:
@@ -2164,12 +2224,9 @@ int bch2_check_dirents(struct bch_fs *c)
snapshots_seen_init(&s);
int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_dirents,
+ for_each_btree_key(trans, iter, BTREE_ID_dirents,
POS(BCACHEFS_ROOT_INO, 0),
- BTREE_ITER_prefetch|BTREE_ITER_all_snapshots,
- k,
- NULL, NULL,
- BCH_TRANS_COMMIT_no_enospc,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?:
check_subdir_count_notnested(trans, &dir));
@@ -2314,22 +2371,6 @@ static bool darray_u32_has(darray_u32 *d, u32 v)
return false;
}
-/*
- * We've checked that inode backpointers point to valid dirents; here, it's
- * sufficient to check that the subvolume root has a dirent:
- */
-static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s)
-{
- struct bch_inode_unpacked inode;
- int ret = bch2_inode_find_by_inum_trans(trans,
- (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) },
- &inode);
- if (ret)
- return ret;
-
- return inode.bi_dir != 0;
-}
-
static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
@@ -2348,14 +2389,24 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
- ret = subvol_has_dirent(trans, s);
- if (ret < 0)
+ struct bch_inode_unpacked subvol_root;
+ ret = bch2_inode_find_by_inum_trans(trans,
+ (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) },
+ &subvol_root);
+ if (ret)
break;
- if (fsck_err_on(!ret,
+ /*
+ * We've checked that inode backpointers point to valid dirents;
+ * here, it's sufficient to check that the subvolume root has a
+ * dirent:
+ */
+ if (fsck_err_on(!subvol_root.bi_dir,
trans, subvol_unreachable,
"unreachable subvolume %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c),
+ prt_newline(&buf),
+ bch2_inode_unpacked_to_text(&buf, &subvol_root),
buf.buf))) {
ret = reattach_subvol(trans, s);
break;
@@ -2450,10 +2501,8 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
if (ret && !bch2_err_matches(ret, ENOENT))
break;
- if (!ret && !dirent_points_to_inode(d, &inode)) {
+ if (!ret && (ret = dirent_points_to_inode(c, d, &inode)))
bch2_trans_iter_exit(trans, &dirent_iter);
- ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
- }
if (bch2_err_matches(ret, ENOENT)) {
ret = 0;
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 6ac0ff7e074b..753c208896c3 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -320,9 +320,11 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
int bch2_inode_unpack(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
{
- if (likely(k.k->type == KEY_TYPE_inode_v3))
- return bch2_inode_unpack_v3(k, unpacked);
- return bch2_inode_unpack_slowpath(k, unpacked);
+ unpacked->bi_snapshot = k.k->p.snapshot;
+
+ return likely(k.k->type == KEY_TYPE_inode_v3)
+ ? bch2_inode_unpack_v3(k, unpacked)
+ : bch2_inode_unpack_slowpath(k, unpacked);
}
int bch2_inode_peek_nowarn(struct btree_trans *trans,
@@ -557,7 +559,7 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
{
- prt_printf(out, "inum: %llu ", inode->bi_inum);
+ prt_printf(out, "inum: %llu:%u ", inode->bi_inum, inode->bi_snapshot);
__bch2_inode_unpacked_to_text(out, inode);
}
@@ -1111,7 +1113,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
pos.offset, pos.snapshot))
goto delete;
- if (c->sb.clean &&
+ if (test_bit(BCH_FS_clean_recovery, &c->flags) &&
!fsck_err(trans, deleted_inode_but_clean,
"filesystem marked as clean but have deleted inode %llu:%u",
pos.offset, pos.snapshot)) {
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index f1fcb4c58039..695abd707cb6 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -69,6 +69,7 @@ typedef u64 u96;
struct bch_inode_unpacked {
u64 bi_inum;
+ u32 bi_snapshot;
u64 bi_journal_seq;
__le64 bi_hash_seed;
u64 bi_size;
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index b2f50e74bb76..e4fc17c548fd 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -517,7 +517,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
if ((ret = bkey_err(k)))
goto out;
- if (bversion_cmp(k.k->version, rbio->version) ||
+ if (bversion_cmp(k.k->bversion, rbio->version) ||
!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
goto out;
@@ -1031,7 +1031,7 @@ get_bio:
rbio->read_pos = read_pos;
rbio->data_btree = data_btree;
rbio->data_pos = data_pos;
- rbio->version = k.k->version;
+ rbio->version = k.k->bversion;
rbio->promote = promote;
INIT_WORK(&rbio->work, NULL);
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index d3b5be7fd9bf..b5fe9e0dc155 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -697,7 +697,7 @@ static void init_append_extent(struct bch_write_op *op,
e = bkey_extent_init(op->insert_keys.top);
e->k.p = op->pos;
e->k.size = crc.uncompressed_size;
- e->k.version = version;
+ e->k.bversion = version;
if (crc.csum_type ||
crc.compression_type ||
@@ -1544,7 +1544,7 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
id = bkey_inline_data_init(op->insert_keys.top);
id->k.p = op->pos;
- id->k.version = op->version;
+ id->k.bversion = op->version;
id->k.size = sectors;
iter = bio->bi_iter;
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 30460bce04be..954f6a96e0f4 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -605,7 +605,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
goto out;
}
- if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err),
+ if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c, &err),
c, version, jset, entry,
journal_entry_data_usage_bad_size,
"invalid journal entry usage: %s", err.buf)) {
diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c
index f49fdca1d07d..6f4a4e1083c9 100644
--- a/fs/bcachefs/logged_ops.c
+++ b/fs/bcachefs/logged_ops.c
@@ -37,6 +37,14 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
struct bkey_buf sk;
u32 restart_count = trans->restart_count;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ fsck_err_on(test_bit(BCH_FS_clean_recovery, &c->flags),
+ trans, logged_op_but_clean,
+ "filesystem marked as clean but have logged op\n%s",
+ (bch2_bkey_val_to_text(&buf, c, k),
+ buf.buf));
if (!fn)
return 0;
@@ -47,8 +55,9 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
fn->resume(trans, sk.k);
bch2_bkey_buf_exit(&sk, c);
-
- return trans_was_restarted(trans, restart_count);
+fsck_err:
+ printbuf_exit(&buf);
+ return ret ?: trans_was_restarted(trans, restart_count);
}
int bch2_resume_logged_ops(struct bch_fs *c)
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index be1e7ca4362f..6db72d3bad7d 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -151,7 +151,7 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans,
struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u);
/* Has this delta already been applied to the btree? */
- if (bversion_cmp(old.k->version, k->k->k.version) >= 0) {
+ if (bversion_cmp(old.k->bversion, k->k->k.bversion) >= 0) {
ret = 0;
goto out;
}
@@ -717,6 +717,8 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.fsck)
set_bit(BCH_FS_fsck_running, &c->flags);
+ if (c->sb.clean)
+ set_bit(BCH_FS_clean_recovery, &c->flags);
ret = bch2_blacklist_table_initialize(c);
if (ret) {
@@ -862,6 +864,9 @@ use_clean:
clear_bit(BCH_FS_fsck_running, &c->flags);
+ /* in case we don't run journal replay, i.e. norecovery mode */
+ set_bit(BCH_FS_accounting_replay_done, &c->flags);
+
/* fsync if we fixed errors */
if (test_bit(BCH_FS_errors_fixed, &c->flags) &&
bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) {
diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h
index 8c7dee5983d2..50406ce0e4ef 100644
--- a/fs/bcachefs/recovery_passes_types.h
+++ b/fs/bcachefs/recovery_passes_types.h
@@ -50,7 +50,7 @@
x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
x(check_nlinks, 31, PASS_FSCK) \
x(resume_logged_ops, 23, PASS_ALWAYS) \
- x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \
+ x(delete_dead_inodes, 32, PASS_ALWAYS) \
x(fix_reflink_p, 33, 0) \
x(set_fs_needs_rebalance, 34, 0) \
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index e59c0abb4772..f457925fa362 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -367,7 +367,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
r_v->k.type = bkey_type_to_indirect(&orig->k);
r_v->k.p = reflink_iter.pos;
bch2_key_resize(&r_v->k, orig->k.size);
- r_v->k.version = orig->k.version;
+ r_v->k.bversion = orig->k.bversion;
set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 998c0bd06802..bcb3276747e0 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -66,9 +66,9 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
prt_printf(out, "]");
}
-int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
- struct bch_sb *sb,
- struct printbuf *err)
+static int bch2_replicas_entry_validate_locked(struct bch_replicas_entry_v1 *r,
+ struct bch_sb *sb,
+ struct printbuf *err)
{
if (!r->nr_devs) {
prt_printf(err, "no devices in entry ");
@@ -94,6 +94,16 @@ bad:
return -BCH_ERR_invalid_replicas_entry;
}
+int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
+ struct bch_fs *c,
+ struct printbuf *err)
+{
+ mutex_lock(&c->sb_lock);
+ int ret = bch2_replicas_entry_validate_locked(r, c->disk_sb.sb, err);
+ mutex_unlock(&c->sb_lock);
+ return ret;
+}
+
void bch2_cpu_replicas_to_text(struct printbuf *out,
struct bch_replicas_cpu *r)
{
@@ -676,7 +686,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
struct bch_replicas_entry_v1 *e =
cpu_replicas_entry(cpu_r, i);
- int ret = bch2_replicas_entry_validate(e, sb, err);
+ int ret = bch2_replicas_entry_validate_locked(e, sb, err);
if (ret)
return ret;
diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
index 622482559c3d..5aba2c1ce133 100644
--- a/fs/bcachefs/replicas.h
+++ b/fs/bcachefs/replicas.h
@@ -10,7 +10,7 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
void bch2_replicas_entry_to_text(struct printbuf *,
struct bch_replicas_entry_v1 *);
int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *,
- struct bch_sb *, struct printbuf *);
+ struct bch_fs *, struct printbuf *);
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
static inline struct bch_replicas_entry_v1 *
diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c
index 025848a9c4c0..005275281804 100644
--- a/fs/bcachefs/sb-clean.c
+++ b/fs/bcachefs/sb-clean.c
@@ -167,6 +167,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c)
ret = bch2_sb_clean_validate_late(c, clean, READ);
if (ret) {
+ kfree(clean);
mutex_unlock(&c->sb_lock);
return ERR_PTR(ret);
}
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index c7e4cdd3f6a5..5102059a0f1d 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -312,8 +312,7 @@ static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
if (!first)
prt_char(out, ',');
first = false;
- unsigned e = le16_to_cpu(i->errors[j]);
- prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)");
+ bch2_sb_error_id_to_text(out, le16_to_cpu(i->errors[j]));
}
prt_newline(out);
}
@@ -353,7 +352,9 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
for (unsigned i = 0; i < src->nr_errors; i++)
dst->errors[i] = cpu_to_le16(src->errors[i]);
- downgrade_table_extra(c, &table);
+ ret = downgrade_table_extra(c, &table);
+ if (ret)
+ goto out;
if (!dst->recovery_passes[0] &&
!dst->recovery_passes[1] &&
@@ -399,7 +400,7 @@ void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_mi
for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
unsigned e = le16_to_cpu(i->errors[j]);
- if (e < BCH_SB_ERR_MAX)
+ if (e < BCH_FSCK_ERR_MAX)
__set_bit(e, c->sb.errors_silent);
if (e < sizeof(ext->errors_silent) * 8)
__set_bit_le64(e, ext->errors_silent);
diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
index c1270d790e43..013a96883b4e 100644
--- a/fs/bcachefs/sb-errors.c
+++ b/fs/bcachefs/sb-errors.c
@@ -7,12 +7,12 @@
const char * const bch2_sb_error_strs[] = {
#define x(t, n, ...) [n] = #t,
BCH_SB_ERRS()
- NULL
+#undef x
};
-static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id)
+void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id)
{
- if (id < BCH_SB_ERR_MAX)
+ if (id < BCH_FSCK_ERR_MAX)
prt_str(out, bch2_sb_error_strs[id]);
else
prt_printf(out, "(unknown error %u)", id);
diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h
index 8889001e7db4..b2357b8e6107 100644
--- a/fs/bcachefs/sb-errors.h
+++ b/fs/bcachefs/sb-errors.h
@@ -6,6 +6,8 @@
extern const char * const bch2_sb_error_strs[];
+void bch2_sb_error_id_to_text(struct printbuf *, enum bch_sb_error_id);
+
extern const struct bch_sb_field_ops bch_sb_field_ops_errors;
void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id);
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index f0c14702f9e6..ed5dca5e1161 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -210,22 +210,23 @@ enum bch_fsck_flags {
x(inode_snapshot_mismatch, 196, 0) \
x(inode_unlinked_but_clean, 197, 0) \
x(inode_unlinked_but_nlink_nonzero, 198, 0) \
+ x(inode_unlinked_and_not_open, 281, 0) \
x(inode_checksum_type_invalid, 199, 0) \
x(inode_compression_type_invalid, 200, 0) \
x(inode_subvol_root_but_not_dir, 201, 0) \
- x(inode_i_size_dirty_but_clean, 202, 0) \
- x(inode_i_sectors_dirty_but_clean, 203, 0) \
- x(inode_i_sectors_wrong, 204, 0) \
- x(inode_dir_wrong_nlink, 205, 0) \
- x(inode_dir_multiple_links, 206, 0) \
- x(inode_multiple_links_but_nlink_0, 207, 0) \
- x(inode_wrong_backpointer, 208, 0) \
- x(inode_wrong_nlink, 209, 0) \
- x(inode_unreachable, 210, 0) \
- x(deleted_inode_but_clean, 211, 0) \
- x(deleted_inode_missing, 212, 0) \
- x(deleted_inode_is_dir, 213, 0) \
- x(deleted_inode_not_unlinked, 214, 0) \
+ x(inode_i_size_dirty_but_clean, 202, FSCK_AUTOFIX) \
+ x(inode_i_sectors_dirty_but_clean, 203, FSCK_AUTOFIX) \
+ x(inode_i_sectors_wrong, 204, FSCK_AUTOFIX) \
+ x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \
+ x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \
+ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \
+ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \
+ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \
+ x(inode_unreachable, 210, FSCK_AUTOFIX) \
+ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \
+ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \
+ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \
+ x(deleted_inode_not_unlinked, 214, FSCK_AUTOFIX) \
x(extent_overlapping, 215, 0) \
x(key_in_missing_inode, 216, 0) \
x(key_in_wrong_inode_type, 217, 0) \
@@ -255,7 +256,7 @@ enum bch_fsck_flags {
x(dir_loop, 241, 0) \
x(hash_table_key_duplicate, 242, 0) \
x(hash_table_key_wrong_offset, 243, 0) \
- x(unlinked_inode_not_on_deleted_list, 244, 0) \
+ x(unlinked_inode_not_on_deleted_list, 244, FSCK_AUTOFIX) \
x(reflink_p_front_pad_bad, 245, 0) \
x(journal_entry_dup_same_device, 246, 0) \
x(inode_bi_subvol_missing, 247, 0) \
@@ -270,7 +271,7 @@ enum bch_fsck_flags {
x(subvol_children_not_set, 256, 0) \
x(subvol_children_bad, 257, 0) \
x(subvol_loop, 258, 0) \
- x(subvol_unreachable, 259, 0) \
+ x(subvol_unreachable, 259, FSCK_AUTOFIX) \
x(btree_node_bkey_bad_u64s, 260, 0) \
x(btree_node_topology_empty_interior_node, 261, 0) \
x(btree_ptr_v2_min_key_bad, 262, 0) \
@@ -282,8 +283,8 @@ enum bch_fsck_flags {
x(btree_ptr_v2_written_0, 268, 0) \
x(subvol_snapshot_bad, 269, 0) \
x(subvol_inode_bad, 270, 0) \
- x(alloc_key_stripe_sectors_wrong, 271, 0) \
- x(accounting_mismatch, 272, 0) \
+ x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \
+ x(accounting_mismatch, 272, FSCK_AUTOFIX) \
x(accounting_replicas_not_marked, 273, 0) \
x(invalid_btree_id, 274, 0) \
x(alloc_key_io_time_bad, 275, 0) \
@@ -292,12 +293,14 @@ enum bch_fsck_flags {
x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \
x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
+ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
+ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
+ x(MAX, 284, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
BCH_SB_ERRS()
#undef x
- BCH_SB_ERR_MAX
};
struct bch_sb_field_errors {
diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
index 9cbd3c14c94f..617d07e53b20 100644
--- a/fs/bcachefs/six.c
+++ b/fs/bcachefs/six.c
@@ -169,11 +169,17 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
ret = -1 - SIX_LOCK_write;
}
} else if (type == SIX_LOCK_write && lock->readers) {
- if (try) {
+ if (try)
atomic_add(SIX_LOCK_HELD_write, &lock->state);
- smp_mb__after_atomic();
- }
+ /*
+ * Make sure atomic_add happens before pcpu_read_count and
+ * six_set_bitmask in slow path happens before pcpu_read_count.
+ *
+ * Paired with the smp_mb() in read lock fast path (per-cpu mode)
+ * and the one before atomic_read in read unlock path.
+ */
+ smp_mb();
ret = !pcpu_read_count(lock);
if (try && !ret) {
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 8b18a9b483a4..1809442b00ee 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -469,6 +469,7 @@ static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
u32 id = snapshot_root;
u32 subvol = 0, s;
+ rcu_read_lock();
while (id) {
s = snapshot_t(c, id)->subvol;
@@ -477,6 +478,7 @@ static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
id = bch2_snapshot_tree_next(c, id);
}
+ rcu_read_unlock();
return subvol;
}
@@ -1782,6 +1784,7 @@ static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
new->k.p.snapshot = leaf_id;
ret = bch2_trans_update(trans, &iter, new, 0);
out:
+ bch2_set_btree_iter_dontneed(&iter);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index dbe834cb349f..6845dde1b339 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -92,34 +92,32 @@ static int check_subvol(struct btree_trans *trans,
}
struct bch_inode_unpacked inode;
- struct btree_iter inode_iter = {};
- ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode,
+ ret = bch2_inode_find_by_inum_nowarn_trans(trans,
(subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) },
- 0);
- bch2_trans_iter_exit(trans, &inode_iter);
-
- if (ret && !bch2_err_matches(ret, ENOENT))
- return ret;
-
- if (fsck_err_on(ret,
- trans, subvol_to_missing_root,
- "subvolume %llu points to missing subvolume root %llu:%u",
- k.k->p.offset, le64_to_cpu(subvol.v->inode),
- le32_to_cpu(subvol.v->snapshot))) {
- ret = bch2_subvolume_delete(trans, iter->pos.offset);
- bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
- return ret ?: -BCH_ERR_transaction_restart_nested;
- }
-
- if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset,
- trans, subvol_root_wrong_bi_subvol,
- "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu",
- inode.bi_inum, inode_iter.k.p.snapshot,
- inode.bi_subvol, subvol.k->p.offset)) {
- inode.bi_subvol = subvol.k->p.offset;
- ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot));
- if (ret)
+ &inode);
+ if (!ret) {
+ if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset,
+ trans, subvol_root_wrong_bi_subvol,
+ "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu",
+ inode.bi_inum, inode.bi_snapshot,
+ inode.bi_subvol, subvol.k->p.offset)) {
+ inode.bi_subvol = subvol.k->p.offset;
+ ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot));
+ if (ret)
+ goto err;
+ }
+ } else if (bch2_err_matches(ret, ENOENT)) {
+ if (fsck_err(trans, subvol_to_missing_root,
+ "subvolume %llu points to missing subvolume root %llu:%u",
+ k.k->p.offset, le64_to_cpu(subvol.v->inode),
+ le32_to_cpu(subvol.v->snapshot))) {
+ ret = bch2_subvolume_delete(trans, iter->pos.offset);
+ bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
+ ret = ret ?: -BCH_ERR_transaction_restart_nested;
goto err;
+ }
+ } else {
+ goto err;
}
if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
@@ -137,7 +135,7 @@ static int check_subvol(struct btree_trans *trans,
"%s: snapshot tree %u not found", __func__, snapshot_tree);
if (ret)
- return ret;
+ goto err;
if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset,
trans, subvol_not_master_and_not_snapshot,
@@ -147,7 +145,7 @@ static int check_subvol(struct btree_trans *trans,
bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
ret = PTR_ERR_OR_ZERO(s);
if (ret)
- return ret;
+ goto err;
SET_BCH_SUBVOLUME_SNAP(&s->v, true);
}
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index d86d5dae54c9..ce7410d72089 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -799,8 +799,10 @@ retry:
i < layout.sb_offset + layout.nr_superblocks; i++) {
offset = le64_to_cpu(*i);
- if (offset == opt_get(*opts, sb))
+ if (offset == opt_get(*opts, sb)) {
+ ret = -BCH_ERR_invalid;
continue;
+ }
ret = read_one_super(sb, offset, &err);
if (!ret)
@@ -1188,7 +1190,8 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8);
prt_printf(out, "Errors to silently fix:\t");
- prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8);
+ prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent,
+ min(BCH_FSCK_ERR_MAX, sizeof(e->errors_silent) * 8));
prt_newline(out);
kfree(errors_silent);
diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
index 01b768c9b767..b2f209743afe 100644
--- a/fs/bcachefs/tests.c
+++ b/fs/bcachefs/tests.c
@@ -394,7 +394,7 @@ static int insert_test_extent(struct bch_fs *c,
k.k_i.k.p.offset = end;
k.k_i.k.p.snapshot = U32_MAX;
k.k_i.k.size = end - start;
- k.k_i.k.version.lo = test_version++;
+ k.k_i.k.bversion.lo = test_version++;
ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0);
bch_err_fn(c, ret);
diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c
index fb3442a7c67f..dea73bc1cb51 100644
--- a/fs/bcachefs/thread_with_file.c
+++ b/fs/bcachefs/thread_with_file.c
@@ -275,7 +275,6 @@ static long thread_with_stdio_ioctl(struct file *file, unsigned int cmd, unsigne
}
static const struct file_operations thread_with_stdio_fops = {
- .llseek = no_llseek,
.read = thread_with_stdio_read,
.write = thread_with_stdio_write,
.poll = thread_with_stdio_poll,
@@ -285,7 +284,6 @@ static const struct file_operations thread_with_stdio_fops = {
};
static const struct file_operations thread_with_stdout_fops = {
- .llseek = no_llseek,
.read = thread_with_stdio_read,
.poll = thread_with_stdout_poll,
.flush = thread_with_stdio_flush,
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5d9ccda098cc..53fef258c2bc 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -96,7 +96,6 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
/* dirty the head */
spin_lock(&ci->i_ceph_lock);
- BUG_ON(ci->i_wr_ref == 0); // caller should hold Fw reference
if (__ceph_have_pending_cap_snap(ci)) {
struct ceph_cap_snap *capsnap =
list_last_entry(&ci->i_cap_snaps,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 808c9c048276..bed34fc11c91 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -10,6 +10,7 @@
#include <linux/writeback.h>
#include <linux/iversion.h>
#include <linux/filelock.h>
+#include <linux/jiffies.h>
#include "super.h"
#include "mds_client.h"
@@ -4149,7 +4150,7 @@ retry:
ceph_remove_cap(mdsc, cap, false);
goto out_unlock;
} else if (tsession) {
- /* add placeholder for the export tagert */
+ /* add placeholder for the export target */
int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
tcap = new_cap;
ceph_add_cap(inode, tsession, t_cap_id, issued, 0,
@@ -4602,7 +4603,7 @@ flush_cap_releases:
__ceph_queue_cap_release(session, cap);
spin_unlock(&session->s_cap_lock);
}
- ceph_flush_cap_releases(mdsc, session);
+ ceph_flush_session_cap_releases(mdsc, session);
goto done;
bad:
@@ -4659,7 +4660,7 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
* slowness doesn't block mdsc delayed work,
* preventing send_renew_caps() from running.
*/
- if (jiffies - loop_start >= 5 * HZ)
+ if (time_after_eq(jiffies, loop_start + 5 * HZ))
break;
}
spin_unlock(&mdsc->cap_delay_lock);
@@ -4701,6 +4702,28 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true);
}
+/*
+ * Flush all cap releases to the mds
+ */
+static void flush_cap_releases(struct ceph_mds_session *s)
+{
+ struct ceph_mds_client *mdsc = s->s_mdsc;
+ struct ceph_client *cl = mdsc->fsc->client;
+
+ doutc(cl, "begin\n");
+ spin_lock(&s->s_cap_lock);
+ if (s->s_num_cap_releases)
+ ceph_flush_session_cap_releases(mdsc, s);
+ spin_unlock(&s->s_cap_lock);
+ doutc(cl, "done\n");
+
+}
+
+void ceph_flush_cap_releases(struct ceph_mds_client *mdsc)
+{
+ ceph_mdsc_iterate_sessions(mdsc, flush_cap_releases, true);
+}
+
void __ceph_touch_fmode(struct ceph_inode_info *ci,
struct ceph_mds_client *mdsc, int fmode)
{
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ddec8c9244ee..952109292d69 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -2058,7 +2058,7 @@ static int ceph_d_delete(const struct dentry *dentry)
return 0;
if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
return 0;
- /* vaild lease? */
+ /* valid lease? */
di = ceph_dentry(dentry);
if (di) {
if (__dentry_lease_is_valid(di))
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 4a8eec46254b..315ef02f9a3f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1779,7 +1779,7 @@ retry_lookup:
if (err < 0)
goto done;
} else if (rinfo->head->is_dentry && req->r_dentry) {
- /* parent inode is not locked, be carefull */
+ /* parent inode is not locked, be careful */
struct ceph_vino *ptvino = NULL;
dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 276e34ab3e2c..c4a5fd94bbbb 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2266,7 +2266,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc,
trim_caps - remaining);
}
- ceph_flush_cap_releases(mdsc, session);
+ ceph_flush_session_cap_releases(mdsc, session);
return 0;
}
@@ -2420,7 +2420,7 @@ static void ceph_cap_release_work(struct work_struct *work)
ceph_put_mds_session(session);
}
-void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
+void ceph_flush_session_cap_releases(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session)
{
struct ceph_client *cl = mdsc->fsc->client;
@@ -2447,7 +2447,7 @@ void __ceph_queue_cap_release(struct ceph_mds_session *session,
session->s_num_cap_releases++;
if (!(session->s_num_cap_releases % CEPH_CAPS_PER_RELEASE))
- ceph_flush_cap_releases(session->s_mdsc, session);
+ ceph_flush_session_cap_releases(session->s_mdsc, session);
}
static void ceph_cap_reclaim_work(struct work_struct *work)
@@ -4340,7 +4340,7 @@ skip_cap_auths:
/* flush cap releases */
spin_lock(&session->s_cap_lock);
if (session->s_num_cap_releases)
- ceph_flush_cap_releases(mdsc, session);
+ ceph_flush_session_cap_releases(mdsc, session);
spin_unlock(&session->s_cap_lock);
send_flushmsg_ack(mdsc, session, seq);
@@ -4910,7 +4910,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
} else {
recon_state.msg_version = 2;
}
- /* trsaverse this session's caps */
+ /* traverse this session's caps */
err = ceph_iterate_session_caps(session, reconnect_caps_cb, &recon_state);
spin_lock(&session->s_cap_lock);
@@ -5446,7 +5446,7 @@ static void delayed_work(struct work_struct *work)
}
mutex_unlock(&mdsc->mutex);
- ceph_flush_cap_releases(mdsc, s);
+ ceph_flush_session_cap_releases(mdsc, s);
mutex_lock(&s->s_mutex);
if (renew_caps)
@@ -5877,6 +5877,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
mutex_unlock(&mdsc->mutex);
ceph_flush_dirty_caps(mdsc);
+ ceph_flush_cap_releases(mdsc);
spin_lock(&mdsc->cap_dirty_lock);
want_flush = mdsc->last_cap_flush_tid;
if (!list_empty(&mdsc->cap_flush_list)) {
@@ -6015,6 +6016,18 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
ceph_mdsmap_destroy(mdsc->mdsmap);
kfree(mdsc->sessions);
ceph_caps_finalize(mdsc);
+
+ if (mdsc->s_cap_auths) {
+ int i;
+
+ for (i = 0; i < mdsc->s_cap_auths_num; i++) {
+ kfree(mdsc->s_cap_auths[i].match.gids);
+ kfree(mdsc->s_cap_auths[i].match.path);
+ kfree(mdsc->s_cap_auths[i].match.fs_name);
+ }
+ kfree(mdsc->s_cap_auths);
+ }
+
ceph_pool_perm_destroy(mdsc);
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 9bcc7f181bfe..3dd54587944a 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -559,9 +559,6 @@ extern struct ceph_mds_session *
ceph_get_mds_session(struct ceph_mds_session *s);
extern void ceph_put_mds_session(struct ceph_mds_session *s);
-extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
- struct ceph_msg *msg, int mds);
-
extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
extern void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc);
@@ -602,8 +599,8 @@ extern void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc,
extern struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq);
extern void __ceph_queue_cap_release(struct ceph_mds_session *session,
struct ceph_cap *cap);
-extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session);
+extern void ceph_flush_session_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 0cdf84cd1791..73f321b52895 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -126,6 +126,7 @@ static int ceph_sync_fs(struct super_block *sb, int wait)
if (!wait) {
doutc(cl, "(non-blocking)\n");
ceph_flush_dirty_caps(fsc->mdsc);
+ ceph_flush_cap_releases(fsc->mdsc);
doutc(cl, "(non-blocking) done\n");
return 0;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6e817bf1337c..2508aa8950b7 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1056,8 +1056,6 @@ extern int ceph_fill_trace(struct super_block *sb,
extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct ceph_mds_session *session);
-extern int ceph_inode_holds_cap(struct inode *inode, int mask);
-
extern bool ceph_inode_set_size(struct inode *inode, loff_t size);
extern void __ceph_do_pending_vmtruncate(struct inode *inode);
@@ -1208,10 +1206,6 @@ static inline void ceph_init_inode_acls(struct inode *inode,
struct ceph_acl_sec_ctx *as_ctx)
{
}
-static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode)
-{
- return 0;
-}
static inline void ceph_forget_all_cached_acls(struct inode *inode)
{
@@ -1270,6 +1264,7 @@ extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags);
extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
+extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc);
extern int ceph_drop_caps_for_unlink(struct inode *inode);
extern int ceph_encode_inode_release(void **p, struct inode *inode,
int mds, int drop, int unless, int force);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index c6f4a9a98b85..67299e8b734e 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -1218,7 +1218,6 @@ static const struct file_operations u32_array_fops = {
.open = u32_array_open,
.release = u32_array_release,
.read = u32_array_read,
- .llseek = no_llseek,
};
/**
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 7112958c2e5b..700a0cbb2f14 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -733,7 +733,6 @@ out:
static const struct file_operations dlm_rawmsg_fops = {
.open = simple_open,
.write = dlm_rawmsg_write,
- .llseek = no_llseek,
};
void *dlm_create_debug_comms_file(int nodeid, void *data)
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index 7e9961639802..23c51d62f902 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -110,5 +110,4 @@ const struct file_operations efivarfs_file_operations = {
.open = simple_open,
.read = efivarfs_file_read,
.write = efivarfs_file_write,
- .llseek = no_llseek,
};
diff --git a/fs/fsopen.c b/fs/fsopen.c
index ee92ca58429e..6cef3deccded 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -78,7 +78,6 @@ static int fscontext_release(struct inode *inode, struct file *file)
const struct file_operations fscontext_fops = {
.read = fscontext_read,
.release = fscontext_release,
- .llseek = no_llseek,
};
/*
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 97ac994ff78f..2a730d88cc3b 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -183,27 +183,23 @@ out:
static const struct file_operations fuse_ctl_abort_ops = {
.open = nonseekable_open,
.write = fuse_conn_abort_write,
- .llseek = no_llseek,
};
static const struct file_operations fuse_ctl_waiting_ops = {
.open = nonseekable_open,
.read = fuse_conn_waiting_read,
- .llseek = no_llseek,
};
static const struct file_operations fuse_conn_max_background_ops = {
.open = nonseekable_open,
.read = fuse_conn_max_background_read,
.write = fuse_conn_max_background_write,
- .llseek = no_llseek,
};
static const struct file_operations fuse_conn_congestion_threshold_ops = {
.open = nonseekable_open,
.read = fuse_conn_congestion_threshold_read,
.write = fuse_conn_congestion_threshold_write,
- .llseek = no_llseek,
};
static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 46ed30a4e0fc..1f64ae6d7a69 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2456,7 +2456,6 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.open = fuse_dev_open,
- .llseek = no_llseek,
.read_iter = fuse_dev_read,
.splice_read = fuse_dev_splice_read,
.write_iter = fuse_dev_write,
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index c9f0ed24cb7b..c562aec3b483 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -58,6 +58,7 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
/*
* misc.c
*/
+struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq);
int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
bool needs_put);
struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq);
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 0ad0982ce0e2..63280791de3b 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -9,34 +9,66 @@
#include "internal.h"
/*
- * Append a folio to the rolling queue.
+ * Make sure there's space in the rolling queue.
*/
-int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
- bool needs_put)
+struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq)
{
- struct folio_queue *tail = rreq->buffer_tail;
- unsigned int slot, order = folio_order(folio);
+ struct folio_queue *tail = rreq->buffer_tail, *prev;
+ unsigned int prev_nr_slots = 0;
if (WARN_ON_ONCE(!rreq->buffer && tail) ||
WARN_ON_ONCE(rreq->buffer && !tail))
- return -EIO;
-
- if (!tail || folioq_full(tail)) {
- tail = kmalloc(sizeof(*tail), GFP_NOFS);
- if (!tail)
- return -ENOMEM;
- netfs_stat(&netfs_n_folioq);
- folioq_init(tail);
- tail->prev = rreq->buffer_tail;
- if (tail->prev)
- tail->prev->next = tail;
- rreq->buffer_tail = tail;
- if (!rreq->buffer) {
- rreq->buffer = tail;
- iov_iter_folio_queue(&rreq->io_iter, ITER_SOURCE, tail, 0, 0, 0);
+ return ERR_PTR(-EIO);
+
+ prev = tail;
+ if (prev) {
+ if (!folioq_full(tail))
+ return tail;
+ prev_nr_slots = folioq_nr_slots(tail);
+ }
+
+ tail = kmalloc(sizeof(*tail), GFP_NOFS);
+ if (!tail)
+ return ERR_PTR(-ENOMEM);
+ netfs_stat(&netfs_n_folioq);
+ folioq_init(tail);
+ tail->prev = prev;
+ if (prev)
+ /* [!] NOTE: After we set prev->next, the consumer is entirely
+ * at liberty to delete prev.
+ */
+ WRITE_ONCE(prev->next, tail);
+
+ rreq->buffer_tail = tail;
+ if (!rreq->buffer) {
+ rreq->buffer = tail;
+ iov_iter_folio_queue(&rreq->io_iter, ITER_SOURCE, tail, 0, 0, 0);
+ } else {
+ /* Make sure we don't leave the master iterator pointing to a
+ * block that might get immediately consumed.
+ */
+ if (rreq->io_iter.folioq == prev &&
+ rreq->io_iter.folioq_slot == prev_nr_slots) {
+ rreq->io_iter.folioq = tail;
+ rreq->io_iter.folioq_slot = 0;
}
- rreq->buffer_tail_slot = 0;
}
+ rreq->buffer_tail_slot = 0;
+ return tail;
+}
+
+/*
+ * Append a folio to the rolling queue.
+ */
+int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
+ bool needs_put)
+{
+ struct folio_queue *tail;
+ unsigned int slot, order = folio_order(folio);
+
+ tail = netfs_buffer_make_space(rreq);
+ if (IS_ERR(tail))
+ return PTR_ERR(tail);
rreq->io_iter.count += PAGE_SIZE << order;
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 04e66d587f77..0929d9fd4ce7 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -153,12 +153,22 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
loff_t start)
{
struct netfs_io_subrequest *subreq;
+ struct iov_iter *wreq_iter = &wreq->io_iter;
+
+ /* Make sure we don't point the iterator at a used-up folio_queue
+ * struct being used as a placeholder to prevent the queue from
+ * collapsing. In such a case, extend the queue.
+ */
+ if (iov_iter_is_folioq(wreq_iter) &&
+ wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq)) {
+ netfs_buffer_make_space(wreq);
+ }
subreq = netfs_alloc_subrequest(wreq);
subreq->source = stream->source;
subreq->start = start;
subreq->stream_nr = stream->stream_nr;
- subreq->io_iter = wreq->io_iter;
+ subreq->io_iter = *wreq_iter;
_enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 67ee176b8824..c675fc40ce2d 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -22,7 +22,6 @@ static struct vfsmount *nsfs_mnt;
static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg);
static const struct file_operations ns_file_operations = {
- .llseek = no_llseek,
.unlocked_ioctl = ns_ioctl,
.compat_ioctl = compat_ptr_ioctl,
};
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1fea43c33b6b..db72b3e924b3 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -156,9 +156,8 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count,
&ext_flags);
if (err) {
- mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
- "%llu, NULL)\n", err, inode, (unsigned long long)iblock,
- (unsigned long long)p_blkno);
+ mlog(ML_ERROR, "get_blocks() failed, inode: 0x%p, "
+ "block: %llu\n", inode, (unsigned long long)iblock);
goto bail;
}
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 70a768b623cf..f7672472fa82 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -973,7 +973,13 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
}
while (done < nr) {
- down_read(&OCFS2_I(inode)->ip_alloc_sem);
+ if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
+ rc = -EAGAIN;
+ mlog(ML_ERROR,
+ "Inode #%llu ip_alloc_sem is temporarily unavailable\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ break;
+ }
rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
&p_block, &p_count, NULL);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 4f85508538fc..004393b13c0a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -25,6 +25,7 @@
#include "namei.h"
#include "ocfs2_trace.h"
#include "file.h"
+#include "symlink.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -4148,8 +4149,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
int ret;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *new_bh = NULL;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
@@ -4175,6 +4177,26 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out_unlock;
}
+ if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) &&
+ (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ struct ocfs2_inode_info *new_oi = OCFS2_I(new_inode);
+
+ if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(new_inode))) {
+ struct ocfs2_dinode *new_di = (struct ocfs2_dinode *)new_bh->b_data;
+ struct ocfs2_dinode *old_di = (struct ocfs2_dinode *)old_bh->b_data;
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ int inline_size = le16_to_cpu(old_di->i_xattr_inline_size);
+
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
+ }
+
ret = ocfs2_create_reflink_node(inode, old_bh,
new_inode, new_bh, preserve);
if (ret) {
@@ -4182,7 +4204,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto inode_unlock;
}
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+ if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
ret = ocfs2_reflink_xattrs(inode, old_bh,
new_inode, new_bh,
preserve);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 0e58a5ce539e..dd0a05365e79 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -6511,16 +6511,7 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
}
new_oi = OCFS2_I(args->new_inode);
- /*
- * Adjust extent record count to reserve space for extended attribute.
- * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
- */
- if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
- !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
- struct ocfs2_extent_list *el = &new_di->id2.i_list;
- le16_add_cpu(&el->l_count, -(inline_size /
- sizeof(struct ocfs2_extent_rec)));
- }
+
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 2b7a5a3a7a2f..4504493b20be 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -117,7 +117,7 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
struct file *f = ovl_open_realfile(file, &realpath);
if (IS_ERR(f))
return PTR_ERR(f);
- real->word = (unsigned long)ovl_open_realfile(file, &realpath) | FDPUT_FPUT;
+ real->word = (unsigned long)f | FDPUT_FPUT;
return 0;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 4083ba492cb6..12b22c2723b7 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1231,7 +1231,6 @@ err:
const struct file_operations pipefifo_fops = {
.open = fifo_open,
- .llseek = no_llseek,
.read_iter = pipe_read,
.write_iter = pipe_write,
.poll = pipe_poll,
diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c
index 7481b21a0489..2d851f596a72 100644
--- a/fs/smb/client/cifsencrypt.c
+++ b/fs/smb/client/cifsencrypt.c
@@ -416,7 +416,7 @@ find_timestamp(struct cifs_ses *ses)
}
static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
- const struct nls_table *nls_cp)
+ const struct nls_table *nls_cp, struct shash_desc *hmacmd5)
{
int rc = 0;
int len;
@@ -425,34 +425,26 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
wchar_t *domain;
wchar_t *server;
- if (!ses->server->secmech.hmacmd5) {
- cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__);
- return -1;
- }
-
/* calculate md4 hash of password */
E_md4hash(ses->password, nt_hash, nls_cp);
- rc = crypto_shash_setkey(ses->server->secmech.hmacmd5->tfm, nt_hash,
- CIFS_NTHASH_SIZE);
+ rc = crypto_shash_setkey(hmacmd5->tfm, nt_hash, CIFS_NTHASH_SIZE);
if (rc) {
- cifs_dbg(VFS, "%s: Could not set NT Hash as a key\n", __func__);
+ cifs_dbg(VFS, "%s: Could not set NT hash as a key, rc=%d\n", __func__, rc);
return rc;
}
- rc = crypto_shash_init(ses->server->secmech.hmacmd5);
+ rc = crypto_shash_init(hmacmd5);
if (rc) {
- cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__);
+ cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc);
return rc;
}
/* convert ses->user_name to unicode */
len = ses->user_name ? strlen(ses->user_name) : 0;
user = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (user == NULL) {
- rc = -ENOMEM;
- return rc;
- }
+ if (user == NULL)
+ return -ENOMEM;
if (len) {
len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp);
@@ -461,11 +453,10 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
*(u16 *)user = 0;
}
- rc = crypto_shash_update(ses->server->secmech.hmacmd5,
- (char *)user, 2 * len);
+ rc = crypto_shash_update(hmacmd5, (char *)user, 2 * len);
kfree(user);
if (rc) {
- cifs_dbg(VFS, "%s: Could not update with user\n", __func__);
+ cifs_dbg(VFS, "%s: Could not update with user, rc=%d\n", __func__, rc);
return rc;
}
@@ -474,19 +465,15 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
len = strlen(ses->domainName);
domain = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (domain == NULL) {
- rc = -ENOMEM;
- return rc;
- }
+ if (domain == NULL)
+ return -ENOMEM;
+
len = cifs_strtoUTF16((__le16 *)domain, ses->domainName, len,
nls_cp);
- rc =
- crypto_shash_update(ses->server->secmech.hmacmd5,
- (char *)domain, 2 * len);
+ rc = crypto_shash_update(hmacmd5, (char *)domain, 2 * len);
kfree(domain);
if (rc) {
- cifs_dbg(VFS, "%s: Could not update with domain\n",
- __func__);
+ cifs_dbg(VFS, "%s: Could not update with domain, rc=%d\n", __func__, rc);
return rc;
}
} else {
@@ -494,33 +481,27 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
len = strlen(ses->ip_addr);
server = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (server == NULL) {
- rc = -ENOMEM;
- return rc;
- }
- len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len,
- nls_cp);
- rc =
- crypto_shash_update(ses->server->secmech.hmacmd5,
- (char *)server, 2 * len);
+ if (server == NULL)
+ return -ENOMEM;
+
+ len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len, nls_cp);
+ rc = crypto_shash_update(hmacmd5, (char *)server, 2 * len);
kfree(server);
if (rc) {
- cifs_dbg(VFS, "%s: Could not update with server\n",
- __func__);
+ cifs_dbg(VFS, "%s: Could not update with server, rc=%d\n", __func__, rc);
return rc;
}
}
- rc = crypto_shash_final(ses->server->secmech.hmacmd5,
- ntlmv2_hash);
+ rc = crypto_shash_final(hmacmd5, ntlmv2_hash);
if (rc)
- cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
+ cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc);
return rc;
}
static int
-CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
+CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_desc *hmacmd5)
{
int rc;
struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *)
@@ -531,43 +512,33 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE +
offsetof(struct ntlmv2_resp, challenge.key[0]));
- if (!ses->server->secmech.hmacmd5) {
- cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__);
- return -1;
- }
-
- rc = crypto_shash_setkey(ses->server->secmech.hmacmd5->tfm,
- ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+ rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
- cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n",
- __func__);
+ cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc);
return rc;
}
- rc = crypto_shash_init(ses->server->secmech.hmacmd5);
+ rc = crypto_shash_init(hmacmd5);
if (rc) {
- cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__);
+ cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc);
return rc;
}
if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED)
- memcpy(ntlmv2->challenge.key,
- ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+ memcpy(ntlmv2->challenge.key, ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
else
- memcpy(ntlmv2->challenge.key,
- ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
- rc = crypto_shash_update(ses->server->secmech.hmacmd5,
- ntlmv2->challenge.key, hash_len);
+ memcpy(ntlmv2->challenge.key, ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+
+ rc = crypto_shash_update(hmacmd5, ntlmv2->challenge.key, hash_len);
if (rc) {
- cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
+ cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc);
return rc;
}
/* Note that the MD5 digest over writes anon.challenge_key.key */
- rc = crypto_shash_final(ses->server->secmech.hmacmd5,
- ntlmv2->ntlmv2_hash);
+ rc = crypto_shash_final(hmacmd5, ntlmv2->ntlmv2_hash);
if (rc)
- cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
+ cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc);
return rc;
}
@@ -575,6 +546,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
int
setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
{
+ struct shash_desc *hmacmd5 = NULL;
int rc;
int baselen;
unsigned int tilen;
@@ -640,55 +612,51 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
cifs_server_lock(ses->server);
- rc = cifs_alloc_hash("hmac(md5)", &ses->server->secmech.hmacmd5);
+ rc = cifs_alloc_hash("hmac(md5)", &hmacmd5);
if (rc) {
+ cifs_dbg(VFS, "Could not allocate HMAC-MD5, rc=%d\n", rc);
goto unlock;
}
/* calculate ntlmv2_hash */
- rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
+ rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp, hmacmd5);
if (rc) {
- cifs_dbg(VFS, "Could not get v2 hash rc %d\n", rc);
+ cifs_dbg(VFS, "Could not get NTLMv2 hash, rc=%d\n", rc);
goto unlock;
}
/* calculate first part of the client response (CR1) */
- rc = CalcNTLMv2_response(ses, ntlmv2_hash);
+ rc = CalcNTLMv2_response(ses, ntlmv2_hash, hmacmd5);
if (rc) {
- cifs_dbg(VFS, "Could not calculate CR1 rc: %d\n", rc);
+ cifs_dbg(VFS, "Could not calculate CR1, rc=%d\n", rc);
goto unlock;
}
/* now calculate the session key for NTLMv2 */
- rc = crypto_shash_setkey(ses->server->secmech.hmacmd5->tfm,
- ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+ rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
- cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n",
- __func__);
+ cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc);
goto unlock;
}
- rc = crypto_shash_init(ses->server->secmech.hmacmd5);
+ rc = crypto_shash_init(hmacmd5);
if (rc) {
- cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__);
+ cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc);
goto unlock;
}
- rc = crypto_shash_update(ses->server->secmech.hmacmd5,
- ntlmv2->ntlmv2_hash,
- CIFS_HMAC_MD5_HASH_SIZE);
+ rc = crypto_shash_update(hmacmd5, ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
- cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
+ cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc);
goto unlock;
}
- rc = crypto_shash_final(ses->server->secmech.hmacmd5,
- ses->auth_key.response);
+ rc = crypto_shash_final(hmacmd5, ses->auth_key.response);
if (rc)
- cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
-
+ cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc);
unlock:
cifs_server_unlock(ses->server);
+ cifs_free_hash(&hmacmd5);
setup_ntlmv2_rsp_ret:
kfree_sensitive(tiblob);
@@ -732,16 +700,19 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server)
cifs_free_hash(&server->secmech.aes_cmac);
cifs_free_hash(&server->secmech.hmacsha256);
cifs_free_hash(&server->secmech.md5);
- cifs_free_hash(&server->secmech.sha512);
- cifs_free_hash(&server->secmech.hmacmd5);
- if (server->secmech.enc) {
- crypto_free_aead(server->secmech.enc);
- server->secmech.enc = NULL;
- }
+ if (!SERVER_IS_CHAN(server)) {
+ if (server->secmech.enc) {
+ crypto_free_aead(server->secmech.enc);
+ server->secmech.enc = NULL;
+ }
- if (server->secmech.dec) {
- crypto_free_aead(server->secmech.dec);
+ if (server->secmech.dec) {
+ crypto_free_aead(server->secmech.dec);
+ server->secmech.dec = NULL;
+ }
+ } else {
+ server->secmech.enc = NULL;
server->secmech.dec = NULL;
}
}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 15571cf0ba63..315aac5dec05 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -178,10 +178,8 @@ struct session_key {
/* crypto hashing related structure/fields, not specific to a sec mech */
struct cifs_secmech {
- struct shash_desc *hmacmd5; /* hmacmd5 hash function, for NTLMv2/CR1 hashes */
struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */
struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */
- struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */
struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */
struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 3216f786908f..03c0b484a4b5 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -624,7 +624,7 @@ cifs_ses_add_channel(struct cifs_ses *ses,
* to sign packets before we generate the channel signing key
* (we sign with the session key)
*/
- rc = smb311_crypto_shash_allocate(chan->server);
+ rc = smb3_crypto_shash_allocate(chan->server);
if (rc) {
cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__);
mutex_unlock(&ses->session_mutex);
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
index f3c4b70b77b9..bdeb12ff53e3 100644
--- a/fs/smb/client/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
@@ -906,41 +906,41 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server,
|| (hdr->Status !=
cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))))
return 0;
-
ok:
- rc = smb311_crypto_shash_allocate(server);
- if (rc)
+ rc = cifs_alloc_hash("sha512", &sha512);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not allocate SHA512 shash, rc=%d\n", __func__, rc);
return rc;
+ }
- sha512 = server->secmech.sha512;
rc = crypto_shash_init(sha512);
if (rc) {
- cifs_dbg(VFS, "%s: Could not init sha512 shash\n", __func__);
- return rc;
+ cifs_dbg(VFS, "%s: Could not init SHA512 shash, rc=%d\n", __func__, rc);
+ goto err_free;
}
rc = crypto_shash_update(sha512, ses->preauth_sha_hash,
SMB2_PREAUTH_HASH_SIZE);
if (rc) {
- cifs_dbg(VFS, "%s: Could not update sha512 shash\n", __func__);
- return rc;
+ cifs_dbg(VFS, "%s: Could not update SHA512 shash, rc=%d\n", __func__, rc);
+ goto err_free;
}
for (i = 0; i < nvec; i++) {
rc = crypto_shash_update(sha512, iov[i].iov_base, iov[i].iov_len);
if (rc) {
- cifs_dbg(VFS, "%s: Could not update sha512 shash\n",
- __func__);
- return rc;
+ cifs_dbg(VFS, "%s: Could not update SHA512 shash, rc=%d\n", __func__, rc);
+ goto err_free;
}
}
rc = crypto_shash_final(sha512, ses->preauth_sha_hash);
if (rc) {
- cifs_dbg(VFS, "%s: Could not finalize sha512 shash\n",
- __func__);
- return rc;
+ cifs_dbg(VFS, "%s: Could not finalize SHA12 shash, rc=%d\n", __func__, rc);
+ goto err_free;
}
+err_free:
+ cifs_free_hash(&sha512);
return 0;
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 1ee2dd4a1cae..177173072bfa 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4309,7 +4309,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
*/
static int
crypt_message(struct TCP_Server_Info *server, int num_rqst,
- struct smb_rqst *rqst, int enc)
+ struct smb_rqst *rqst, int enc, struct crypto_aead *tfm)
{
struct smb2_transform_hdr *tr_hdr =
(struct smb2_transform_hdr *)rqst[0].rq_iov[0].iov_base;
@@ -4320,8 +4320,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
u8 key[SMB3_ENC_DEC_KEY_SIZE];
struct aead_request *req;
u8 *iv;
- DECLARE_CRYPTO_WAIT(wait);
- struct crypto_aead *tfm;
unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
void *creq;
size_t sensitive_size;
@@ -4333,14 +4331,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
return rc;
}
- rc = smb3_crypto_aead_allocate(server);
- if (rc) {
- cifs_server_dbg(VFS, "%s: crypto alloc failed\n", __func__);
- return rc;
- }
-
- tfm = enc ? server->secmech.enc : server->secmech.dec;
-
if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
(server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE);
@@ -4380,11 +4370,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
aead_request_set_crypt(req, sg, sg, crypt_len, iv);
aead_request_set_ad(req, assoc_data_len);
- aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &wait);
-
- rc = crypto_wait_req(enc ? crypto_aead_encrypt(req)
- : crypto_aead_decrypt(req), &wait);
+ rc = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
if (!rc && enc)
memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
@@ -4526,7 +4512,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst,
/* fill the 1st iov with a transform header */
fill_transform_hdr(tr_hdr, orig_len, old_rq, server->cipher_type);
- rc = crypt_message(server, num_rqst, new_rq, 1);
+ rc = crypt_message(server, num_rqst, new_rq, 1, server->secmech.enc);
cifs_dbg(FYI, "Encrypt message returned %d\n", rc);
if (rc)
goto err_free;
@@ -4551,8 +4537,9 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
unsigned int buf_data_size, struct iov_iter *iter,
bool is_offloaded)
{
- struct kvec iov[2];
+ struct crypto_aead *tfm;
struct smb_rqst rqst = {NULL};
+ struct kvec iov[2];
size_t iter_size = 0;
int rc;
@@ -4568,9 +4555,31 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
iter_size = iov_iter_count(iter);
}
- rc = crypt_message(server, 1, &rqst, 0);
+ if (is_offloaded) {
+ if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) ||
+ (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
+ tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
+ else
+ tfm = crypto_alloc_aead("ccm(aes)", 0, 0);
+ if (IS_ERR(tfm)) {
+ rc = PTR_ERR(tfm);
+ cifs_server_dbg(VFS, "%s: Failed alloc decrypt TFM, rc=%d\n", __func__, rc);
+
+ return rc;
+ }
+ } else {
+ if (unlikely(!server->secmech.dec))
+ return -EIO;
+
+ tfm = server->secmech.dec;
+ }
+
+ rc = crypt_message(server, 1, &rqst, 0, tfm);
cifs_dbg(FYI, "Decrypt message returned %d\n", rc);
+ if (is_offloaded)
+ crypto_free_aead(tfm);
+
if (rc)
return rc;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index bb225758448a..02828b9c3cb3 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -1266,6 +1266,16 @@ SMB2_negotiate(const unsigned int xid,
else
cifs_server_dbg(VFS, "Missing expected negotiate contexts\n");
}
+
+ if (server->cipher_type && !rc) {
+ if (!SERVER_IS_CHAN(server)) {
+ rc = smb3_crypto_aead_allocate(server);
+ } else {
+ /* For channels, just reuse the primary server crypto secmech. */
+ server->secmech.enc = server->primary_server->secmech.enc;
+ server->secmech.dec = server->primary_server->secmech.dec;
+ }
+ }
neg_exit:
free_rsp_buf(resp_buftype, rsp);
return rc;
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index c7e1b149877a..56a896ff7cd9 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -291,7 +291,7 @@ extern int smb2_validate_and_copy_iov(unsigned int offset,
extern void smb2_copy_fs_info_to_kstatfs(
struct smb2_fs_full_size_info *pfs_inf,
struct kstatfs *kst);
-extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server);
+extern int smb3_crypto_shash_allocate(struct TCP_Server_Info *server);
extern int smb311_update_preauth_hash(struct cifs_ses *ses,
struct TCP_Server_Info *server,
struct kvec *iov, int nvec);
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index e4636fca821d..f7e04c40d22e 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -26,8 +26,7 @@
#include "../common/smb2status.h"
#include "smb2glob.h"
-static int
-smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
+int smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
{
struct cifs_secmech *p = &server->secmech;
int rc;
@@ -46,33 +45,6 @@ err:
return rc;
}
-int
-smb311_crypto_shash_allocate(struct TCP_Server_Info *server)
-{
- struct cifs_secmech *p = &server->secmech;
- int rc = 0;
-
- rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256);
- if (rc)
- return rc;
-
- rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac);
- if (rc)
- goto err;
-
- rc = cifs_alloc_hash("sha512", &p->sha512);
- if (rc)
- goto err;
-
- return 0;
-
-err:
- cifs_free_hash(&p->aes_cmac);
- cifs_free_hash(&p->hmacsha256);
- return rc;
-}
-
-
static
int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
{
@@ -242,7 +214,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
ses = smb2_find_smb_ses(server, le64_to_cpu(shdr->SessionId));
if (unlikely(!ses)) {
- cifs_server_dbg(VFS, "%s: Could not find session\n", __func__);
+ cifs_server_dbg(FYI, "%s: Could not find session\n", __func__);
return -ENOENT;
}
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index c769f9dbc0b4..9f272cc8f566 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -6,7 +6,7 @@
* Note that, due to trying to use names similar to the protocol specifications,
* there are many mixed case field names in the structures below. Although
* this does not match typical Linux kernel style, it is necessary to be
- * able to match against the protocol specfication.
+ * able to match against the protocol specification.
*
* SMB2 commands
* Some commands have minimal (wct=0,bcc=0), or uninteresting, responses
@@ -491,7 +491,7 @@ struct smb2_encryption_neg_context {
__le16 ContextType; /* 2 */
__le16 DataLength;
__le32 Reserved;
- /* CipherCount usally 2, but can be 3 when AES256-GCM enabled */
+ /* CipherCount usually 2, but can be 3 when AES256-GCM enabled */
__le16 CipherCount; /* AES128-GCM and AES128-CCM by default */
__le16 Ciphers[];
} __packed;
@@ -1061,7 +1061,7 @@ struct smb2_server_client_notification {
#define IL_IMPERSONATION cpu_to_le32(0x00000002)
#define IL_DELEGATE cpu_to_le32(0x00000003)
-/* File Attrubutes */
+/* File Attributes */
#define FILE_ATTRIBUTE_READONLY 0x00000001
#define FILE_ATTRIBUTE_HIDDEN 0x00000002
#define FILE_ATTRIBUTE_SYSTEM 0x00000004
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index cac80e7bfefc..aa2a37a7ce84 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -25,7 +25,7 @@ DECLARE_RWSEM(conn_list_lock);
/**
* ksmbd_conn_free() - free resources of the connection instance
*
- * @conn: connection instance to be cleand up
+ * @conn: connection instance to be cleaned up
*
* During the thread termination, the corresponding conn instance
* resources(sock/memory) are released and finally the conn object is freed.
diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index f4e55199938d..38e6fd2da3b8 100644
--- a/fs/smb/server/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
@@ -213,7 +213,7 @@ struct ksmbd_tree_connect_response {
};
/*
- * IPC Request struture to disconnect tree connection.
+ * IPC Request structure to disconnect tree connection.
*/
struct ksmbd_tree_disconnect_request {
__u64 session_id; /* session id */
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 246cde380dfb..4142c7ad5fa9 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -796,7 +796,7 @@ out:
/**
* smb2_lease_break_noti() - break lease when a new client request
* write lease
- * @opinfo: conains lease state information
+ * @opinfo: contains lease state information
*
* Return: 0 on success, otherwise error
*/
@@ -1484,7 +1484,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease)
}
/**
- * parse_lease_state() - parse lease context containted in file open request
+ * parse_lease_state() - parse lease context contained in file open request
* @open_req: buffer containing smb2 file open(create) request
*
* Return: allocated lease context object on success, otherwise NULL
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index c402d4abe826..231d2d224656 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -279,7 +279,7 @@ static void handle_ksmbd_work(struct work_struct *wk)
/**
* queue_ksmbd_work() - queue a smb request to worker thread queue
- * for proccessing smb command and sending response
+ * for processing smb command and sending response
* @conn: connection instance
*
* read remaining data from socket create and submit work.
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index e6bdc1b20727..7460089c186f 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1335,8 +1335,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
return rc;
sz = le16_to_cpu(rsp->SecurityBufferOffset);
- chgblob =
- (struct challenge_message *)((char *)&rsp->hdr.ProtocolId + sz);
+ chgblob = (struct challenge_message *)rsp->Buffer;
memset(chgblob, 0, sizeof(struct challenge_message));
if (!work->conn->use_spnego) {
@@ -1369,9 +1368,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
goto out;
}
- sz = le16_to_cpu(rsp->SecurityBufferOffset);
- unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len,
- /* alloc is larger than blob, see smb2_allocate_rsp_buf() */);
+ memcpy(rsp->Buffer, spnego_blob, spnego_blob_len);
rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
out:
@@ -1453,10 +1450,7 @@ static int ntlm_authenticate(struct ksmbd_work *work,
if (rc)
return -ENOMEM;
- sz = le16_to_cpu(rsp->SecurityBufferOffset);
- unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob,
- spnego_blob_len,
- /* alloc is larger than blob, see smb2_allocate_rsp_buf() */);
+ memcpy(rsp->Buffer, spnego_blob, spnego_blob_len);
rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
kfree(spnego_blob);
}
@@ -2058,18 +2052,20 @@ out_err1:
* @access: file access flags
* @disposition: file disposition flags
* @may_flags: set with MAY_ flags
- * @is_dir: is creating open flags for directory
+ * @coptions: file creation options
+ * @mode: file mode
*
* Return: file open flags
*/
static int smb2_create_open_flags(bool file_present, __le32 access,
__le32 disposition,
int *may_flags,
- bool is_dir)
+ __le32 coptions,
+ umode_t mode)
{
int oflags = O_NONBLOCK | O_LARGEFILE;
- if (is_dir) {
+ if (coptions & FILE_DIRECTORY_FILE_LE || S_ISDIR(mode)) {
access &= ~FILE_WRITE_DESIRE_ACCESS_LE;
ksmbd_debug(SMB, "Discard write access to a directory\n");
}
@@ -2086,7 +2082,7 @@ static int smb2_create_open_flags(bool file_present, __le32 access,
*may_flags = MAY_OPEN | MAY_READ;
}
- if (access == FILE_READ_ATTRIBUTES_LE)
+ if (access == FILE_READ_ATTRIBUTES_LE || S_ISBLK(mode) || S_ISCHR(mode))
oflags |= O_PATH;
if (file_present) {
@@ -3181,8 +3177,8 @@ int smb2_open(struct ksmbd_work *work)
open_flags = smb2_create_open_flags(file_present, daccess,
req->CreateDisposition,
&may_flags,
- req->CreateOptions & FILE_DIRECTORY_FILE_LE ||
- (file_present && S_ISDIR(d_inode(path.dentry)->i_mode)));
+ req->CreateOptions,
+ file_present ? d_inode(path.dentry)->i_mode : 0);
if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
if (open_flags & (O_CREAT | O_TRUNC)) {
@@ -3531,8 +3527,9 @@ int smb2_open(struct ksmbd_work *work)
memcpy(fp->create_guid, dh_info.CreateGuid,
SMB2_CREATE_GUID_SIZE);
if (dh_info.timeout)
- fp->durable_timeout = min(dh_info.timeout,
- DURABLE_HANDLE_MAX_TIMEOUT);
+ fp->durable_timeout =
+ min_t(unsigned int, dh_info.timeout,
+ DURABLE_HANDLE_MAX_TIMEOUT);
else
fp->durable_timeout = 60;
}
@@ -4586,7 +4583,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
path = &fp->filp->f_path;
/* single EA entry is requested with given user.* name */
if (req->InputBufferLength) {
- if (le32_to_cpu(req->InputBufferLength) <
+ if (le32_to_cpu(req->InputBufferLength) <=
sizeof(struct smb2_ea_info_req))
return -EINVAL;
@@ -8090,7 +8087,7 @@ int smb2_ioctl(struct ksmbd_work *work)
goto out;
}
- if (in_buf_len < sizeof(struct copychunk_ioctl_req)) {
+ if (in_buf_len <= sizeof(struct copychunk_ioctl_req)) {
ret = -EINVAL;
goto out;
}
diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h
index 3be7d5ae65a8..73aff20e22d0 100644
--- a/fs/smb/server/smb2pdu.h
+++ b/fs/smb/server/smb2pdu.h
@@ -194,7 +194,7 @@ struct copychunk_ioctl_req {
__le64 ResumeKey[3];
__le32 ChunkCount;
__le32 Reserved;
- __u8 Chunks[1]; /* array of srv_copychunk */
+ __u8 Chunks[]; /* array of srv_copychunk */
} __packed;
struct srv_copychunk {
@@ -370,7 +370,7 @@ struct smb2_file_attr_tag_info {
struct smb2_ea_info_req {
__le32 NextEntryOffset;
__u8 EaNameLength;
- char name[1];
+ char name[];
} __packed; /* level 15 Query */
struct smb2_ea_info {
diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c
index cc4bb2377cbd..5b8d75e78ffb 100644
--- a/fs/smb/server/smb_common.c
+++ b/fs/smb/server/smb_common.c
@@ -488,7 +488,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
* @shortname: destination short filename
*
* Return: shortname length or 0 when source long name is '.' or '..'
- * TODO: Though this function comforms the restriction of 8.3 Filename spec,
+ * TODO: Though this function conforms the restriction of 8.3 Filename spec,
* but the result is different with Windows 7's one. need to check.
*/
int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname,
diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h
index b0f6d0f94cb8..5bbb179736c2 100644
--- a/fs/smb/server/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
@@ -100,8 +100,8 @@ struct ksmbd_file {
struct list_head blocked_works;
struct list_head lock_list;
- int durable_timeout;
- int durable_scavenger_timeout;
+ unsigned int durable_timeout;
+ unsigned int durable_scavenger_timeout;
/* if ls is happening on directory, below is valid*/
struct ksmbd_readdir_data readdir_data;
diff --git a/fs/smb/server/xattr.h b/fs/smb/server/xattr.h
index fa3e27d6971b..505101a8104c 100644
--- a/fs/smb/server/xattr.h
+++ b/fs/smb/server/xattr.h
@@ -99,7 +99,7 @@ struct xattr_ntacl {
__u8 posix_acl_hash[XATTR_SD_HASH_SIZE]; /* 64bytes hash for posix acl */
};
-/* DOS ATTRIBUITE XATTR PREFIX */
+/* DOS ATTRIBUTE XATTR PREFIX */
#define DOS_ATTRIBUTE_PREFIX "DOSATTRIB"
#define DOS_ATTRIBUTE_PREFIX_LEN (sizeof(DOS_ATTRIBUTE_PREFIX) - 1)
#define XATTR_NAME_DOS_ATTRIBUTE (XATTR_USER_PREFIX DOS_ATTRIBUTE_PREFIX)
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index d91cec93d968..5cc69beaa62e 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2807,7 +2807,6 @@ static const struct file_operations dfs_fops = {
.read = dfs_file_read,
.write = dfs_file_write,
.owner = THIS_MODULE,
- .llseek = no_llseek,
};
/**
@@ -2952,7 +2951,6 @@ static const struct file_operations dfs_global_fops = {
.read = dfs_global_file_read,
.write = dfs_global_file_write,
.owner = THIS_MODULE,
- .llseek = no_llseek,
};
/**
diff --git a/include/linux/einj-cxl.h b/include/cxl/einj.h
index 624ff6ff41f9..624ff6ff41f9 100644
--- a/include/linux/einj-cxl.h
+++ b/include/cxl/einj.h
diff --git a/include/linux/cxl-event.h b/include/cxl/event.h
index 0bea1afbd747..0bea1afbd747 100644
--- a/include/linux/cxl-event.h
+++ b/include/cxl/event.h
diff --git a/include/cxl/mailbox.h b/include/cxl/mailbox.h
new file mode 100644
index 000000000000..bacd111e75f1
--- /dev/null
+++ b/include/cxl/mailbox.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2024 Intel Corporation. */
+#ifndef __CXL_MBOX_H__
+#define __CXL_MBOX_H__
+#include <linux/rcuwait.h>
+
+struct cxl_mbox_cmd;
+
+/**
+ * struct cxl_mailbox - context for CXL mailbox operations
+ * @host: device that hosts the mailbox
+ * @payload_size: Size of space for payload
+ * (CXL 3.1 8.2.8.4.3 Mailbox Capabilities Register)
+ * @mbox_mutex: mutex protects device mailbox and firmware
+ * @mbox_wait: rcuwait for mailbox
+ * @mbox_send: @dev specific transport for transmitting mailbox commands
+ */
+struct cxl_mailbox {
+ struct device *host;
+ size_t payload_size;
+ struct mutex mbox_mutex; /* lock to protect mailbox context */
+ struct rcuwait mbox_wait;
+ int (*mbox_send)(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd);
+};
+
+int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host);
+
+#endif
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 1655c4c23a78..4d5ee84c468b 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -363,6 +363,7 @@ void acpi_unregister_gsi (u32 gsi);
struct pci_dev;
+struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin);
int acpi_pci_irq_enable (struct pci_dev *dev);
void acpi_penalize_isa_irq(int irq, int active);
bool acpi_isa_irq_available(int irq);
diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h
index e4004d1e6725..b3643de9931d 100644
--- a/include/linux/attribute_container.h
+++ b/include/linux/attribute_container.h
@@ -61,14 +61,8 @@ int attribute_container_device_trigger_safe(struct device *dev,
int (*undo)(struct attribute_container *,
struct device *,
struct device *));
-void attribute_container_trigger(struct device *dev,
- int (*fn)(struct attribute_container *,
- struct device *));
int attribute_container_add_attrs(struct device *classdev);
int attribute_container_add_class_device(struct device *classdev);
-int attribute_container_add_class_device_adapter(struct attribute_container *cont,
- struct device *dev,
- struct device *classdev);
void attribute_container_remove_attrs(struct device *classdev);
void attribute_container_class_device_del(struct device *classdev);
struct attribute_container *attribute_container_classdev_to_container(struct device *);
diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index 662b8ae54b6a..31762324bcc9 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -271,6 +271,6 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv);
struct auxiliary_device *auxiliary_find_device(struct device *start,
const void *data,
- int (*match)(struct device *dev, const void *data));
+ device_match_t match);
#endif /* _AUXILIARY_BUS_H_ */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index d3b66d77df7a..262b6596eca5 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -203,12 +203,12 @@ unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
* the bit offset of all zero areas this function finds is multiples of that
* power of 2. A @align_mask of 0 means no alignment is required.
*/
-static inline unsigned long
-bitmap_find_next_zero_area(unsigned long *map,
- unsigned long size,
- unsigned long start,
- unsigned int nr,
- unsigned long align_mask)
+static __always_inline
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned int nr,
+ unsigned long align_mask)
{
return bitmap_find_next_zero_area_off(map, size, start, nr,
align_mask, 0);
@@ -228,7 +228,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE)
-static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
+static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
unsigned int len = bitmap_size(nbits);
@@ -238,7 +238,7 @@ static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
memset(dst, 0, len);
}
-static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
+static __always_inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
{
unsigned int len = bitmap_size(nbits);
@@ -248,8 +248,8 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
memset(dst, 0xff, len);
}
-static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
- unsigned int nbits)
+static __always_inline
+void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits)
{
unsigned int len = bitmap_size(nbits);
@@ -262,8 +262,8 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
/*
* Copy bitmap and clear tail bits in last word.
*/
-static inline void bitmap_copy_clear_tail(unsigned long *dst,
- const unsigned long *src, unsigned int nbits)
+static __always_inline
+void bitmap_copy_clear_tail(unsigned long *dst, const unsigned long *src, unsigned int nbits)
{
bitmap_copy(dst, src, nbits);
if (nbits % BITS_PER_LONG)
@@ -318,16 +318,18 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits);
bitmap_copy_clear_tail((unsigned long *)(buf), (const unsigned long *)(bitmap), (nbits))
#endif
-static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_and(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_and(dst, src1, src2, nbits);
}
-static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+void bitmap_or(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src1 | *src2;
@@ -335,8 +337,9 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
__bitmap_or(dst, src1, src2, nbits);
}
-static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+void bitmap_xor(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src1 ^ *src2;
@@ -344,16 +347,17 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
__bitmap_xor(dst, src1, src2, nbits);
}
-static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_andnot(dst, src1, src2, nbits);
}
-static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
- unsigned int nbits)
+static __always_inline
+void bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = ~(*src);
@@ -368,8 +372,8 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
#endif
#define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1)
-static inline bool bitmap_equal(const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -388,10 +392,9 @@ static inline bool bitmap_equal(const unsigned long *src1,
*
* Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
*/
-static inline bool bitmap_or_equal(const unsigned long *src1,
- const unsigned long *src2,
- const unsigned long *src3,
- unsigned int nbits)
+static __always_inline
+bool bitmap_or_equal(const unsigned long *src1, const unsigned long *src2,
+ const unsigned long *src3, unsigned int nbits)
{
if (!small_const_nbits(nbits))
return __bitmap_or_equal(src1, src2, src3, nbits);
@@ -399,9 +402,8 @@ static inline bool bitmap_or_equal(const unsigned long *src1,
return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits));
}
-static inline bool bitmap_intersects(const unsigned long *src1,
- const unsigned long *src2,
- unsigned int nbits)
+static __always_inline
+bool bitmap_intersects(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -409,8 +411,8 @@ static inline bool bitmap_intersects(const unsigned long *src1,
return __bitmap_intersects(src1, src2, nbits);
}
-static inline bool bitmap_subset(const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
@@ -418,7 +420,8 @@ static inline bool bitmap_subset(const unsigned long *src1,
return __bitmap_subset(src1, src2, nbits);
}
-static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
+static __always_inline
+bool bitmap_empty(const unsigned long *src, unsigned nbits)
{
if (small_const_nbits(nbits))
return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -426,7 +429,8 @@ static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
return find_first_bit(src, nbits) == nbits;
}
-static inline bool bitmap_full(const unsigned long *src, unsigned int nbits)
+static __always_inline
+bool bitmap_full(const unsigned long *src, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -460,8 +464,8 @@ unsigned long bitmap_weight_andnot(const unsigned long *src1,
return __bitmap_weight_andnot(src1, src2, nbits);
}
-static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
- unsigned int nbits)
+static __always_inline
+void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits)
{
if (__builtin_constant_p(nbits) && nbits == 1)
__set_bit(start, map);
@@ -476,8 +480,8 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
__bitmap_set(map, start, nbits);
}
-static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
- unsigned int nbits)
+static __always_inline
+void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits)
{
if (__builtin_constant_p(nbits) && nbits == 1)
__clear_bit(start, map);
@@ -492,8 +496,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
__bitmap_clear(map, start, nbits);
}
-static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
- unsigned int shift, unsigned int nbits)
+static __always_inline
+void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift;
@@ -501,8 +506,9 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s
__bitmap_shift_right(dst, src, shift, nbits);
}
-static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src,
- unsigned int shift, unsigned int nbits)
+static __always_inline
+void bitmap_shift_left(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits);
@@ -510,11 +516,12 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr
__bitmap_shift_left(dst, src, shift, nbits);
}
-static inline void bitmap_replace(unsigned long *dst,
- const unsigned long *old,
- const unsigned long *new,
- const unsigned long *mask,
- unsigned int nbits)
+static __always_inline
+void bitmap_replace(unsigned long *dst,
+ const unsigned long *old,
+ const unsigned long *new,
+ const unsigned long *mask,
+ unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = (*old & ~(*mask)) | (*new & *mask);
@@ -557,8 +564,9 @@ static inline void bitmap_replace(unsigned long *dst,
* bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation.
* See bitmap_scatter() for details related to this relationship.
*/
-static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src,
- const unsigned long *mask, unsigned int nbits)
+static __always_inline
+void bitmap_scatter(unsigned long *dst, const unsigned long *src,
+ const unsigned long *mask, unsigned int nbits)
{
unsigned int n = 0;
unsigned int bit;
@@ -611,8 +619,9 @@ static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src,
* bitmap_scatter(res, src, mask, n) and a call to
* bitmap_scatter(res, result, mask, n) will lead to the same res value.
*/
-static inline void bitmap_gather(unsigned long *dst, const unsigned long *src,
- const unsigned long *mask, unsigned int nbits)
+static __always_inline
+void bitmap_gather(unsigned long *dst, const unsigned long *src,
+ const unsigned long *mask, unsigned int nbits)
{
unsigned int n = 0;
unsigned int bit;
@@ -623,9 +632,9 @@ static inline void bitmap_gather(unsigned long *dst, const unsigned long *src,
__assign_bit(n++, dst, test_bit(bit, src));
}
-static inline void bitmap_next_set_region(unsigned long *bitmap,
- unsigned int *rs, unsigned int *re,
- unsigned int end)
+static __always_inline
+void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs,
+ unsigned int *re, unsigned int end)
{
*rs = find_next_bit(bitmap, end, *rs);
*re = find_next_zero_bit(bitmap, end, *rs + 1);
@@ -640,7 +649,8 @@ static inline void bitmap_next_set_region(unsigned long *bitmap,
* This is the complement to __bitmap_find_free_region() and releases
* the found region (by clearing it in the bitmap).
*/
-static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
+static __always_inline
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
{
bitmap_clear(bitmap, pos, BIT(order));
}
@@ -656,7 +666,8 @@ static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos
* Returns: 0 on success, or %-EBUSY if specified region wasn't
* free (not all bits were zero).
*/
-static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
+static __always_inline
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
{
unsigned int len = BIT(order);
@@ -680,7 +691,8 @@ static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos
* Returns: the bit offset in bitmap of the allocated region,
* or -errno on failure.
*/
-static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
+static __always_inline
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
{
unsigned int pos, end; /* scans bitmap by regions of size order */
@@ -734,7 +746,7 @@ static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bi
* That is ``(u32 *)(&val)[0]`` gets the upper 32 bits,
* but we expect the lower 32-bits of u64.
*/
-static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
+static __always_inline void bitmap_from_u64(unsigned long *dst, u64 mask)
{
bitmap_from_arr64(dst, &mask, 64);
}
@@ -749,9 +761,8 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
* @map memory region. For @nbits = 0 and @nbits > BITS_PER_LONG the return
* value is undefined.
*/
-static inline unsigned long bitmap_read(const unsigned long *map,
- unsigned long start,
- unsigned long nbits)
+static __always_inline
+unsigned long bitmap_read(const unsigned long *map, unsigned long start, unsigned long nbits)
{
size_t index = BIT_WORD(start);
unsigned long offset = start % BITS_PER_LONG;
@@ -784,8 +795,9 @@ static inline unsigned long bitmap_read(const unsigned long *map,
*
* For @nbits == 0 and @nbits > BITS_PER_LONG no writes are performed.
*/
-static inline void bitmap_write(unsigned long *map, unsigned long value,
- unsigned long start, unsigned long nbits)
+static __always_inline
+void bitmap_write(unsigned long *map, unsigned long value,
+ unsigned long start, unsigned long nbits)
{
size_t index;
unsigned long offset;
diff --git a/include/linux/bits.h b/include/linux/bits.h
index 0eb24d21aac2..60044b608817 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -36,4 +36,19 @@
#define GENMASK_ULL(h, l) \
(GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
+#if !defined(__ASSEMBLY__)
+/*
+ * Missing asm support
+ *
+ * __GENMASK_U128() depends on _BIT128() which would not work
+ * in the asm code, as it shifts an 'unsigned __init128' data
+ * type instead of direct representation of 128 bit constants
+ * such as long and unsigned long. The fundamental problem is
+ * that a 128 bit constant will get silently truncated by the
+ * gcc compiler.
+ */
+#define GENMASK_U128(h, l) \
+ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l))
+#endif
+
#endif /* __LINUX_BITS_H */
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index f66f6aac74f6..d7941478158c 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -449,8 +449,6 @@ extern int ceph_osdc_init(struct ceph_osd_client *osdc,
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
-extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
- struct ceph_msg *msg);
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
struct ceph_msg *msg);
void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index a3d3e888cf1f..038b2d523bf8 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -4,6 +4,142 @@
#include <linux/compiler.h>
+/**
+ * DOC: scope-based cleanup helpers
+ *
+ * The "goto error" pattern is notorious for introducing subtle resource
+ * leaks. It is tedious and error prone to add new resource acquisition
+ * constraints into code paths that already have several unwind
+ * conditions. The "cleanup" helpers enable the compiler to help with
+ * this tedium and can aid in maintaining LIFO (last in first out)
+ * unwind ordering to avoid unintentional leaks.
+ *
+ * As drivers make up the majority of the kernel code base, here is an
+ * example of using these helpers to clean up PCI drivers. The target of
+ * the cleanups are occasions where a goto is used to unwind a device
+ * reference (pci_dev_put()), or unlock the device (pci_dev_unlock())
+ * before returning.
+ *
+ * The DEFINE_FREE() macro can arrange for PCI device references to be
+ * dropped when the associated variable goes out of scope::
+ *
+ * DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T))
+ * ...
+ * struct pci_dev *dev __free(pci_dev_put) =
+ * pci_get_slot(parent, PCI_DEVFN(0, 0));
+ *
+ * The above will automatically call pci_dev_put() if @dev is non-NULL
+ * when @dev goes out of scope (automatic variable scope). If a function
+ * wants to invoke pci_dev_put() on error, but return @dev (i.e. without
+ * freeing it) on success, it can do::
+ *
+ * return no_free_ptr(dev);
+ *
+ * ...or::
+ *
+ * return_ptr(dev);
+ *
+ * The DEFINE_GUARD() macro can arrange for the PCI device lock to be
+ * dropped when the scope where guard() is invoked ends::
+ *
+ * DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T))
+ * ...
+ * guard(pci_dev)(dev);
+ *
+ * The lifetime of the lock obtained by the guard() helper follows the
+ * scope of automatic variable declaration. Take the following example::
+ *
+ * func(...)
+ * {
+ * if (...) {
+ * ...
+ * guard(pci_dev)(dev); // pci_dev_lock() invoked here
+ * ...
+ * } // <- implied pci_dev_unlock() triggered here
+ * }
+ *
+ * Observe the lock is held for the remainder of the "if ()" block not
+ * the remainder of "func()".
+ *
+ * Now, when a function uses both __free() and guard(), or multiple
+ * instances of __free(), the LIFO order of variable definition order
+ * matters. GCC documentation says:
+ *
+ * "When multiple variables in the same scope have cleanup attributes,
+ * at exit from the scope their associated cleanup functions are run in
+ * reverse order of definition (last defined, first cleanup)."
+ *
+ * When the unwind order matters it requires that variables be defined
+ * mid-function scope rather than at the top of the file. Take the
+ * following example and notice the bug highlighted by "!!"::
+ *
+ * LIST_HEAD(list);
+ * DEFINE_MUTEX(lock);
+ *
+ * struct object {
+ * struct list_head node;
+ * };
+ *
+ * static struct object *alloc_add(void)
+ * {
+ * struct object *obj;
+ *
+ * lockdep_assert_held(&lock);
+ * obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ * if (obj) {
+ * LIST_HEAD_INIT(&obj->node);
+ * list_add(obj->node, &list):
+ * }
+ * return obj;
+ * }
+ *
+ * static void remove_free(struct object *obj)
+ * {
+ * lockdep_assert_held(&lock);
+ * list_del(&obj->node);
+ * kfree(obj);
+ * }
+ *
+ * DEFINE_FREE(remove_free, struct object *, if (_T) remove_free(_T))
+ * static int init(void)
+ * {
+ * struct object *obj __free(remove_free) = NULL;
+ * int err;
+ *
+ * guard(mutex)(&lock);
+ * obj = alloc_add();
+ *
+ * if (!obj)
+ * return -ENOMEM;
+ *
+ * err = other_init(obj);
+ * if (err)
+ * return err; // remove_free() called without the lock!!
+ *
+ * no_free_ptr(obj);
+ * return 0;
+ * }
+ *
+ * That bug is fixed by changing init() to call guard() and define +
+ * initialize @obj in this order::
+ *
+ * guard(mutex)(&lock);
+ * struct object *obj __free(remove_free) = alloc_add();
+ *
+ * Given that the "__free(...) = NULL" pattern for variables defined at
+ * the top of the function poses this potential interdependency problem
+ * the recommendation is to always define and assign variables in one
+ * statement and not group variable definitions at the top of the
+ * function when __free() is used.
+ *
+ * Lastly, given that the benefit of cleanup helpers is removal of
+ * "goto", and that the "goto" statement can jump between scopes, the
+ * expectation is that usage of "goto" and cleanup helpers is never
+ * mixed in the same function. I.e. for a given routine, convert all
+ * resources that need a "goto" cleanup to scope-based cleanup, or
+ * convert none of them.
+ */
+
/*
* DEFINE_FREE(name, type, free):
* simple helper macro that defines the required wrapper for a __free()
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ec55bcce4146..4d4e23b6e3e7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -133,7 +133,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
#define annotate_unreachable() __annotate_unreachable(__COUNTER__)
/* Annotate a C jump table to allow objtool to follow the code flow */
-#define __annotate_jump_table __section(".rodata..c_jump_table")
+#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #")
#else /* !CONFIG_OBJTOOL */
#define annotate_reachable()
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 53158de44b83..9278a50d514f 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -30,7 +30,7 @@
extern unsigned int nr_cpu_ids;
#endif
-static inline void set_nr_cpu_ids(unsigned int nr)
+static __always_inline void set_nr_cpu_ids(unsigned int nr)
{
#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
WARN_ON(nr != nr_cpu_ids);
@@ -149,7 +149,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
*
* Return: >= nr_cpu_ids if no cpus set.
*/
-static inline unsigned int cpumask_first(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_first(const struct cpumask *srcp)
{
return find_first_bit(cpumask_bits(srcp), small_cpumask_bits);
}
@@ -160,7 +160,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
*
* Return: >= nr_cpu_ids if all cpus are set.
*/
-static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
{
return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits);
}
@@ -172,7 +172,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
*
* Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
*/
-static inline
+static __always_inline
unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
{
return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
@@ -186,7 +186,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask
*
* Return: >= nr_cpu_ids if no cpus set in all.
*/
-static inline
+static __always_inline
unsigned int cpumask_first_and_and(const struct cpumask *srcp1,
const struct cpumask *srcp2,
const struct cpumask *srcp3)
@@ -201,7 +201,7 @@ unsigned int cpumask_first_and_and(const struct cpumask *srcp1,
*
* Return: >= nr_cpumask_bits if no CPUs set.
*/
-static inline unsigned int cpumask_last(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_last(const struct cpumask *srcp)
{
return find_last_bit(cpumask_bits(srcp), small_cpumask_bits);
}
@@ -213,7 +213,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp)
*
* Return: >= nr_cpu_ids if no further cpus set.
*/
-static inline
+static __always_inline
unsigned int cpumask_next(int n, const struct cpumask *srcp)
{
/* -1 is a legal arg here. */
@@ -229,7 +229,8 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp)
*
* Return: >= nr_cpu_ids if no further cpus unset.
*/
-static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
+static __always_inline
+unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
{
/* -1 is a legal arg here. */
if (n != -1)
@@ -239,18 +240,21 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
#if NR_CPUS == 1
/* Uniprocessor: there is only one valid CPU */
-static inline unsigned int cpumask_local_spread(unsigned int i, int node)
+static __always_inline
+unsigned int cpumask_local_spread(unsigned int i, int node)
{
return 0;
}
-static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
return cpumask_first_and(src1p, src2p);
}
-static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp)
+static __always_inline
+unsigned int cpumask_any_distribute(const struct cpumask *srcp)
{
return cpumask_first(srcp);
}
@@ -269,9 +273,9 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp);
*
* Return: >= nr_cpu_ids if no further cpus set in both.
*/
-static inline
+static __always_inline
unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
- const struct cpumask *src2p)
+ const struct cpumask *src2p)
{
/* -1 is a legal arg here. */
if (n != -1)
@@ -291,7 +295,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
#if NR_CPUS == 1
-static inline
+static __always_inline
unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
{
cpumask_check(start);
@@ -394,7 +398,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
* Often used to find any cpu but smp_processor_id() in a mask.
* Return: >= nr_cpu_ids if no cpus set.
*/
-static inline
+static __always_inline
unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
{
unsigned int i;
@@ -414,7 +418,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
*
* Returns >= nr_cpu_ids if no cpus set.
*/
-static inline
+static __always_inline
unsigned int cpumask_any_and_but(const struct cpumask *mask1,
const struct cpumask *mask2,
unsigned int cpu)
@@ -436,7 +440,8 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1,
*
* Return: >= nr_cpu_ids if such cpu doesn't exist.
*/
-static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
+static __always_inline
+unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
{
return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu));
}
@@ -449,7 +454,7 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s
*
* Return: >= nr_cpu_ids if such cpu doesn't exist.
*/
-static inline
+static __always_inline
unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
const struct cpumask *srcp2)
{
@@ -465,7 +470,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
*
* Return: >= nr_cpu_ids if such cpu doesn't exist.
*/
-static inline
+static __always_inline
unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
const struct cpumask *srcp2)
{
@@ -508,12 +513,14 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp
* @cpu: cpu number (< nr_cpu_ids)
* @dstp: the cpumask pointer
*/
-static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+static __always_inline
+void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
{
set_bit(cpumask_check(cpu), cpumask_bits(dstp));
}
-static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+static __always_inline
+void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
{
__set_bit(cpumask_check(cpu), cpumask_bits(dstp));
}
@@ -557,7 +564,8 @@ static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp,
*
* Return: true if @cpu is set in @cpumask, else returns false
*/
-static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
+static __always_inline
+bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
{
return test_bit(cpumask_check(cpu), cpumask_bits((cpumask)));
}
@@ -571,7 +579,8 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum
*
* Return: true if @cpu is set in old bitmap of @cpumask, else returns false
*/
-static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
+static __always_inline
+bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
{
return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask));
}
@@ -585,7 +594,8 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp
*
* Return: true if @cpu is set in old bitmap of @cpumask, else returns false
*/
-static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
+static __always_inline
+bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
{
return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask));
}
@@ -594,7 +604,7 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *
* cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
* @dstp: the cpumask pointer
*/
-static inline void cpumask_setall(struct cpumask *dstp)
+static __always_inline void cpumask_setall(struct cpumask *dstp)
{
if (small_const_nbits(small_cpumask_bits)) {
cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits);
@@ -607,7 +617,7 @@ static inline void cpumask_setall(struct cpumask *dstp)
* cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask
* @dstp: the cpumask pointer
*/
-static inline void cpumask_clear(struct cpumask *dstp)
+static __always_inline void cpumask_clear(struct cpumask *dstp)
{
bitmap_zero(cpumask_bits(dstp), large_cpumask_bits);
}
@@ -620,9 +630,9 @@ static inline void cpumask_clear(struct cpumask *dstp)
*
* Return: false if *@dstp is empty, else returns true
*/
-static inline bool cpumask_and(struct cpumask *dstp,
- const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p),
cpumask_bits(src2p), small_cpumask_bits);
@@ -634,8 +644,9 @@ static inline bool cpumask_and(struct cpumask *dstp,
* @src1p: the first input
* @src2p: the second input
*/
-static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p),
cpumask_bits(src2p), small_cpumask_bits);
@@ -647,9 +658,9 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
* @src1p: the first input
* @src2p: the second input
*/
-static inline void cpumask_xor(struct cpumask *dstp,
- const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+void cpumask_xor(struct cpumask *dstp, const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p),
cpumask_bits(src2p), small_cpumask_bits);
@@ -663,9 +674,9 @@ static inline void cpumask_xor(struct cpumask *dstp,
*
* Return: false if *@dstp is empty, else returns true
*/
-static inline bool cpumask_andnot(struct cpumask *dstp,
- const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
cpumask_bits(src2p), small_cpumask_bits);
@@ -678,8 +689,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp,
*
* Return: true if the cpumasks are equal, false if not
*/
-static inline bool cpumask_equal(const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p)
{
return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p),
small_cpumask_bits);
@@ -694,9 +705,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p,
* Return: true if first cpumask ORed with second cpumask == third cpumask,
* otherwise false
*/
-static inline bool cpumask_or_equal(const struct cpumask *src1p,
- const struct cpumask *src2p,
- const struct cpumask *src3p)
+static __always_inline
+bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p,
+ const struct cpumask *src3p)
{
return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p),
cpumask_bits(src3p), small_cpumask_bits);
@@ -710,8 +721,8 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p,
* Return: true if first cpumask ANDed with second cpumask is non-empty,
* otherwise false
*/
-static inline bool cpumask_intersects(const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p)
{
return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p),
small_cpumask_bits);
@@ -724,8 +735,8 @@ static inline bool cpumask_intersects(const struct cpumask *src1p,
*
* Return: true if *@src1p is a subset of *@src2p, else returns false
*/
-static inline bool cpumask_subset(const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p)
{
return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p),
small_cpumask_bits);
@@ -737,7 +748,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p,
*
* Return: true if srcp is empty (has no bits set), else false
*/
-static inline bool cpumask_empty(const struct cpumask *srcp)
+static __always_inline bool cpumask_empty(const struct cpumask *srcp)
{
return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits);
}
@@ -748,7 +759,7 @@ static inline bool cpumask_empty(const struct cpumask *srcp)
*
* Return: true if srcp is full (has all bits set), else false
*/
-static inline bool cpumask_full(const struct cpumask *srcp)
+static __always_inline bool cpumask_full(const struct cpumask *srcp)
{
return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits);
}
@@ -759,7 +770,7 @@ static inline bool cpumask_full(const struct cpumask *srcp)
*
* Return: count of bits set in *srcp
*/
-static inline unsigned int cpumask_weight(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_weight(const struct cpumask *srcp)
{
return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits);
}
@@ -771,8 +782,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)
*
* Return: count of bits set in both *srcp1 and *srcp2
*/
-static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
- const struct cpumask *srcp2)
+static __always_inline
+unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
{
return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
}
@@ -784,8 +795,9 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
*
* Return: count of bits set in both *srcp1 and *srcp2
*/
-static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
- const struct cpumask *srcp2)
+static __always_inline
+unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
+ const struct cpumask *srcp2)
{
return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
}
@@ -796,8 +808,8 @@ static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
* @srcp: the input to shift
* @n: the number of bits to shift by
*/
-static inline void cpumask_shift_right(struct cpumask *dstp,
- const struct cpumask *srcp, int n)
+static __always_inline
+void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n)
{
bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n,
small_cpumask_bits);
@@ -809,8 +821,8 @@ static inline void cpumask_shift_right(struct cpumask *dstp,
* @srcp: the input to shift
* @n: the number of bits to shift by
*/
-static inline void cpumask_shift_left(struct cpumask *dstp,
- const struct cpumask *srcp, int n)
+static __always_inline
+void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n)
{
bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n,
nr_cpumask_bits);
@@ -821,8 +833,8 @@ static inline void cpumask_shift_left(struct cpumask *dstp,
* @dstp: the result
* @srcp: the input cpumask
*/
-static inline void cpumask_copy(struct cpumask *dstp,
- const struct cpumask *srcp)
+static __always_inline
+void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
{
bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
}
@@ -858,8 +870,8 @@ static inline void cpumask_copy(struct cpumask *dstp,
*
* Return: -errno, or 0 for success.
*/
-static inline int cpumask_parse_user(const char __user *buf, int len,
- struct cpumask *dstp)
+static __always_inline
+int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp)
{
return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
}
@@ -872,8 +884,8 @@ static inline int cpumask_parse_user(const char __user *buf, int len,
*
* Return: -errno, or 0 for success.
*/
-static inline int cpumask_parselist_user(const char __user *buf, int len,
- struct cpumask *dstp)
+static __always_inline
+int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp)
{
return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
nr_cpumask_bits);
@@ -886,7 +898,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len,
*
* Return: -errno, or 0 for success.
*/
-static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
+static __always_inline int cpumask_parse(const char *buf, struct cpumask *dstp)
{
return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpumask_bits);
}
@@ -898,7 +910,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
*
* Return: -errno, or 0 for success.
*/
-static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
+static __always_inline int cpulist_parse(const char *buf, struct cpumask *dstp)
{
return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits);
}
@@ -908,7 +920,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
*
* Return: size to allocate for a &struct cpumask in bytes
*/
-static inline unsigned int cpumask_size(void)
+static __always_inline unsigned int cpumask_size(void)
{
return bitmap_size(large_cpumask_bits);
}
@@ -920,7 +932,7 @@ static inline unsigned int cpumask_size(void)
bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
-static inline
+static __always_inline
bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
{
return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node);
@@ -938,13 +950,13 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
*
* Return: %true if allocation succeeded, %false if not
*/
-static inline
+static __always_inline
bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE);
}
-static inline
+static __always_inline
bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
return alloc_cpumask_var(mask, flags | __GFP_ZERO);
@@ -954,7 +966,7 @@ void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
void free_cpumask_var(cpumask_var_t mask);
void free_bootmem_cpumask_var(cpumask_var_t mask);
-static inline bool cpumask_available(cpumask_var_t mask)
+static __always_inline bool cpumask_available(cpumask_var_t mask)
{
return mask != NULL;
}
@@ -964,43 +976,43 @@ static inline bool cpumask_available(cpumask_var_t mask)
#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
#define __cpumask_var_read_mostly
-static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
return true;
}
-static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+static __always_inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
int node)
{
return true;
}
-static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+static __always_inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
cpumask_clear(*mask);
return true;
}
-static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+static __always_inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
int node)
{
cpumask_clear(*mask);
return true;
}
-static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
+static __always_inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
{
}
-static inline void free_cpumask_var(cpumask_var_t mask)
+static __always_inline void free_cpumask_var(cpumask_var_t mask)
{
}
-static inline void free_bootmem_cpumask_var(cpumask_var_t mask)
+static __always_inline void free_bootmem_cpumask_var(cpumask_var_t mask)
{
}
-static inline bool cpumask_available(cpumask_var_t mask)
+static __always_inline bool cpumask_available(cpumask_var_t mask)
{
return true;
}
@@ -1058,7 +1070,7 @@ void set_cpu_online(unsigned int cpu, bool online);
((struct cpumask *)(1 ? (bitmap) \
: (void *)sizeof(__check_is_bitmap(bitmap))))
-static inline int __check_is_bitmap(const unsigned long *bitmap)
+static __always_inline int __check_is_bitmap(const unsigned long *bitmap)
{
return 1;
}
@@ -1073,7 +1085,7 @@ static inline int __check_is_bitmap(const unsigned long *bitmap)
extern const unsigned long
cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
-static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
+static __always_inline const struct cpumask *get_cpu_mask(unsigned int cpu)
{
const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
p -= cpu / BITS_PER_LONG;
@@ -1100,32 +1112,32 @@ static __always_inline unsigned int num_online_cpus(void)
#define num_present_cpus() cpumask_weight(cpu_present_mask)
#define num_active_cpus() cpumask_weight(cpu_active_mask)
-static inline bool cpu_online(unsigned int cpu)
+static __always_inline bool cpu_online(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_online_mask);
}
-static inline bool cpu_enabled(unsigned int cpu)
+static __always_inline bool cpu_enabled(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_enabled_mask);
}
-static inline bool cpu_possible(unsigned int cpu)
+static __always_inline bool cpu_possible(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_possible_mask);
}
-static inline bool cpu_present(unsigned int cpu)
+static __always_inline bool cpu_present(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_present_mask);
}
-static inline bool cpu_active(unsigned int cpu)
+static __always_inline bool cpu_active(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_active_mask);
}
-static inline bool cpu_dying(unsigned int cpu)
+static __always_inline bool cpu_dying(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_dying_mask);
}
@@ -1138,32 +1150,32 @@ static inline bool cpu_dying(unsigned int cpu)
#define num_present_cpus() 1U
#define num_active_cpus() 1U
-static inline bool cpu_online(unsigned int cpu)
+static __always_inline bool cpu_online(unsigned int cpu)
{
return cpu == 0;
}
-static inline bool cpu_possible(unsigned int cpu)
+static __always_inline bool cpu_possible(unsigned int cpu)
{
return cpu == 0;
}
-static inline bool cpu_enabled(unsigned int cpu)
+static __always_inline bool cpu_enabled(unsigned int cpu)
{
return cpu == 0;
}
-static inline bool cpu_present(unsigned int cpu)
+static __always_inline bool cpu_present(unsigned int cpu)
{
return cpu == 0;
}
-static inline bool cpu_active(unsigned int cpu)
+static __always_inline bool cpu_active(unsigned int cpu)
{
return cpu == 0;
}
-static inline bool cpu_dying(unsigned int cpu)
+static __always_inline bool cpu_dying(unsigned int cpu)
{
return false;
}
@@ -1197,7 +1209,7 @@ static inline bool cpu_dying(unsigned int cpu)
* Return: the length of the (null-terminated) @buf string, zero if
* nothing is copied.
*/
-static inline ssize_t
+static __always_inline ssize_t
cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
{
return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask),
@@ -1220,9 +1232,9 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
* Return: the length of how many bytes have been copied, excluding
* terminating '\0'.
*/
-static inline ssize_t
-cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
- loff_t off, size_t count)
+static __always_inline
+ssize_t cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
+ loff_t off, size_t count)
{
return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask),
nr_cpu_ids, off, count) - 1;
@@ -1242,9 +1254,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
* Return: the length of how many bytes have been copied, excluding
* terminating '\0'.
*/
-static inline ssize_t
-cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
- loff_t off, size_t count)
+static __always_inline
+ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
+ loff_t off, size_t count)
{
return bitmap_print_list_to_buf(buf, cpumask_bits(mask),
nr_cpu_ids, off, count) - 1;
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index c9c65b132c0f..0928a6c8ae1e 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -57,7 +57,6 @@ static const struct file_operations __fops = { \
.release = simple_attr_release, \
.read = debugfs_attr_read, \
.write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \
- .llseek = no_llseek, \
}
#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 53ca3a913d06..8321f65897f3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -524,7 +524,6 @@ int dm_post_suspending(struct dm_target *ti);
int dm_noflush_suspending(struct dm_target *ti);
void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors);
void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone);
-union map_info *dm_get_rq_mapinfo(struct request *rq);
#ifdef CONFIG_BLK_DEV_ZONED
struct dm_report_zones_args {
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index 807831d6bf0f..cdc4757217f9 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -126,6 +126,9 @@ struct bus_attribute {
int __must_check bus_create_file(const struct bus_type *bus, struct bus_attribute *attr);
void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr);
+/* Matching function type for drivers/base APIs to find a specific device */
+typedef int (*device_match_t)(struct device *dev, const void *data);
+
/* Generic device matching functions that all busses can use to match with */
int device_match_name(struct device *dev, const void *name);
int device_match_of_node(struct device *dev, const void *np);
@@ -139,8 +142,7 @@ int device_match_any(struct device *dev, const void *unused);
int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data,
int (*fn)(struct device *dev, void *data));
struct device *bus_find_device(const struct bus_type *bus, struct device *start,
- const void *data,
- int (*match)(struct device *dev, const void *data));
+ const void *data, device_match_t match);
/**
* bus_find_device_by_name - device iterator for locating a particular device
* of a specific name.
diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index c576b49c55c2..518c9c83d64b 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -95,7 +95,7 @@ void class_dev_iter_exit(struct class_dev_iter *iter);
int class_for_each_device(const struct class *class, const struct device *start, void *data,
int (*fn)(struct device *dev, void *data));
struct device *class_find_device(const struct class *class, const struct device *start,
- const void *data, int (*match)(struct device *, const void *));
+ const void *data, device_match_t match);
/**
* class_find_device_by_name - device iterator for locating a particular device
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index 1fc8b68786de..5c04b8e3833b 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -157,7 +157,7 @@ int __must_check driver_for_each_device(struct device_driver *drv, struct device
void *data, int (*fn)(struct device *dev, void *));
struct device *driver_find_device(const struct device_driver *drv,
struct device *start, const void *data,
- int (*match)(struct device *dev, const void *data));
+ device_match_t match);
/**
* driver_find_device_by_name - device iterator for locating a particular device
diff --git a/include/linux/find.h b/include/linux/find.h
index 5dfca4225fef..68685714bc18 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -52,7 +52,7 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
@@ -81,7 +81,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_and_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
unsigned long offset)
@@ -112,7 +112,7 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_andnot_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
unsigned long offset)
@@ -142,7 +142,7 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1,
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_or_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
unsigned long offset)
@@ -171,7 +171,7 @@ unsigned long find_next_or_bit(const unsigned long *addr1,
* Returns the bit number of the next zero bit
* If no bits are zero, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
@@ -198,7 +198,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
* Returns the bit number of the first set bit.
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
{
if (small_const_nbits(size)) {
@@ -224,7 +224,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
* Returns the bit number of the N'th set bit.
* If no such, returns >= @size.
*/
-static inline
+static __always_inline
unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n)
{
if (n >= size)
@@ -249,7 +249,7 @@ unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsign
* Returns the bit number of the N'th set bit.
* If no such, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
unsigned long size, unsigned long n)
{
@@ -276,7 +276,7 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *
* Returns the bit number of the N'th set bit.
* If no such, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
unsigned long size, unsigned long n)
{
@@ -332,7 +332,7 @@ unsigned long find_nth_and_andnot_bit(const unsigned long *addr1,
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_first_and_bit(const unsigned long *addr1,
const unsigned long *addr2,
unsigned long size)
@@ -357,7 +357,7 @@ unsigned long find_first_and_bit(const unsigned long *addr1,
* Returns the bit number for the first set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_first_and_and_bit(const unsigned long *addr1,
const unsigned long *addr2,
const unsigned long *addr3,
@@ -381,7 +381,7 @@ unsigned long find_first_and_and_bit(const unsigned long *addr1,
* Returns the bit number of the first cleared bit.
* If no bits are zero, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
if (small_const_nbits(size)) {
@@ -402,7 +402,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
*
* Returns the bit number of the last set bit, or size.
*/
-static inline
+static __always_inline
unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
{
if (small_const_nbits(size)) {
@@ -425,7 +425,7 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
* Returns the bit number for the next set bit, or first set bit up to @offset
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
const unsigned long *addr2,
unsigned long size, unsigned long offset)
@@ -448,7 +448,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
* Returns the bit number for the next set bit, or first set bit up to @offset
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_bit_wrap(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
@@ -465,7 +465,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr,
* Helper for for_each_set_bit_wrap(). Make sure you're doing right thing
* before using it alone.
*/
-static inline
+static __always_inline
unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size,
unsigned long start, unsigned long n)
{
@@ -506,20 +506,20 @@ extern unsigned long find_next_clump8(unsigned long *clump,
#if defined(__LITTLE_ENDIAN)
-static inline unsigned long find_next_zero_bit_le(const void *addr,
- unsigned long size, unsigned long offset)
+static __always_inline
+unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset)
{
return find_next_zero_bit(addr, size, offset);
}
-static inline unsigned long find_next_bit_le(const void *addr,
- unsigned long size, unsigned long offset)
+static __always_inline
+unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset)
{
return find_next_bit(addr, size, offset);
}
-static inline unsigned long find_first_zero_bit_le(const void *addr,
- unsigned long size)
+static __always_inline
+unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
{
return find_first_zero_bit(addr, size);
}
@@ -527,7 +527,7 @@ static inline unsigned long find_first_zero_bit_le(const void *addr,
#elif defined(__BIG_ENDIAN)
#ifndef find_next_zero_bit_le
-static inline
+static __always_inline
unsigned long find_next_zero_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
@@ -546,7 +546,7 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned
#endif
#ifndef find_first_zero_bit_le
-static inline
+static __always_inline
unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
{
if (small_const_nbits(size)) {
@@ -560,7 +560,7 @@ unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
#endif
#ifndef find_next_bit_le
-static inline
+static __always_inline
unsigned long find_next_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index eae5b67e4a15..e3c603d01337 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3234,7 +3234,6 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
-#define no_llseek NULL
extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index 083c860fd28e..c90ec889bfc2 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -436,7 +436,7 @@ void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev);
struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
u16 if_id);
-extern struct bus_type fsl_mc_bus_type;
+extern const struct bus_type fsl_mc_bus_type;
extern struct device_type fsl_mc_bus_dprc_type;
extern struct device_type fsl_mc_bus_dpni_type;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 98c47c394b89..e4697539b665 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -692,6 +692,9 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask,
bool allow_alloc_fallback);
+struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask, gfp_t gfp_mask);
+
int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
pgoff_t idx);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
@@ -1060,6 +1063,13 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
}
static inline struct folio *
+alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask, gfp_t gfp_mask)
+{
+ return NULL;
+}
+
+static inline struct folio *
alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask,
bool allow_alloc_fallback)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0d5125a3e31a..db567d26f7b9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1529,8 +1529,22 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
#endif
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
-int kvm_arch_hardware_enable(void);
-void kvm_arch_hardware_disable(void);
+/*
+ * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under
+ * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of
+ * kvm_usage_count, i.e. at the beginning of the generic hardware enabling
+ * sequence, and at the end of the generic hardware disabling sequence.
+ */
+void kvm_arch_enable_virtualization(void);
+void kvm_arch_disable_virtualization(void);
+/*
+ * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to
+ * do the actual twiddling of hardware bits. The hooks are called on all
+ * online CPUs when KVM enables/disabled virtualization, and on a single CPU
+ * when that CPU is onlined/offlined (including for Resume/Suspend).
+ */
+int kvm_arch_enable_virtualization_cpu(void);
+void kvm_arch_disable_virtualization_cpu(void);
#endif
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index b61438313a73..9fd7a0ce9c1a 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -107,11 +107,11 @@ extern nodemask_t _unused_nodemask_arg_;
*/
#define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \
__nodemask_pr_bits(maskp)
-static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m)
+static __always_inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m)
{
return m ? MAX_NUMNODES : 0;
}
-static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m)
+static __always_inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m)
{
return m ? m->bits : NULL;
}
@@ -132,19 +132,19 @@ static __always_inline void __node_set(int node, volatile nodemask_t *dstp)
}
#define node_clear(node, dst) __node_clear((node), &(dst))
-static inline void __node_clear(int node, volatile nodemask_t *dstp)
+static __always_inline void __node_clear(int node, volatile nodemask_t *dstp)
{
clear_bit(node, dstp->bits);
}
#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
-static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits)
+static __always_inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits)
{
bitmap_fill(dstp->bits, nbits);
}
#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
-static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
+static __always_inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
{
bitmap_zero(dstp->bits, nbits);
}
@@ -154,14 +154,14 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
#define node_test_and_set(node, nodemask) \
__node_test_and_set((node), &(nodemask))
-static inline bool __node_test_and_set(int node, nodemask_t *addr)
+static __always_inline bool __node_test_and_set(int node, nodemask_t *addr)
{
return test_and_set_bit(node, addr->bits);
}
#define nodes_and(dst, src1, src2) \
__nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -169,7 +169,7 @@ static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_or(dst, src1, src2) \
__nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -177,7 +177,7 @@ static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_xor(dst, src1, src2) \
__nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -185,7 +185,7 @@ static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_andnot(dst, src1, src2) \
__nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -193,7 +193,7 @@ static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_complement(dst, src) \
__nodes_complement(&(dst), &(src), MAX_NUMNODES)
-static inline void __nodes_complement(nodemask_t *dstp,
+static __always_inline void __nodes_complement(nodemask_t *dstp,
const nodemask_t *srcp, unsigned int nbits)
{
bitmap_complement(dstp->bits, srcp->bits, nbits);
@@ -201,7 +201,7 @@ static inline void __nodes_complement(nodemask_t *dstp,
#define nodes_equal(src1, src2) \
__nodes_equal(&(src1), &(src2), MAX_NUMNODES)
-static inline bool __nodes_equal(const nodemask_t *src1p,
+static __always_inline bool __nodes_equal(const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
return bitmap_equal(src1p->bits, src2p->bits, nbits);
@@ -209,7 +209,7 @@ static inline bool __nodes_equal(const nodemask_t *src1p,
#define nodes_intersects(src1, src2) \
__nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
-static inline bool __nodes_intersects(const nodemask_t *src1p,
+static __always_inline bool __nodes_intersects(const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
return bitmap_intersects(src1p->bits, src2p->bits, nbits);
@@ -217,33 +217,33 @@ static inline bool __nodes_intersects(const nodemask_t *src1p,
#define nodes_subset(src1, src2) \
__nodes_subset(&(src1), &(src2), MAX_NUMNODES)
-static inline bool __nodes_subset(const nodemask_t *src1p,
+static __always_inline bool __nodes_subset(const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
return bitmap_subset(src1p->bits, src2p->bits, nbits);
}
#define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
-static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
+static __always_inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
{
return bitmap_empty(srcp->bits, nbits);
}
#define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
-static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits)
+static __always_inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits)
{
return bitmap_full(srcp->bits, nbits);
}
#define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES)
-static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits)
+static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits)
{
return bitmap_weight(srcp->bits, nbits);
}
#define nodes_shift_right(dst, src, n) \
__nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES)
-static inline void __nodes_shift_right(nodemask_t *dstp,
+static __always_inline void __nodes_shift_right(nodemask_t *dstp,
const nodemask_t *srcp, int n, int nbits)
{
bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
@@ -251,7 +251,7 @@ static inline void __nodes_shift_right(nodemask_t *dstp,
#define nodes_shift_left(dst, src, n) \
__nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES)
-static inline void __nodes_shift_left(nodemask_t *dstp,
+static __always_inline void __nodes_shift_left(nodemask_t *dstp,
const nodemask_t *srcp, int n, int nbits)
{
bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
@@ -261,13 +261,13 @@ static inline void __nodes_shift_left(nodemask_t *dstp,
> MAX_NUMNODES, then the silly min_ts could be dropped. */
#define first_node(src) __first_node(&(src))
-static inline unsigned int __first_node(const nodemask_t *srcp)
+static __always_inline unsigned int __first_node(const nodemask_t *srcp)
{
return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES));
}
#define next_node(n, src) __next_node((n), &(src))
-static inline unsigned int __next_node(int n, const nodemask_t *srcp)
+static __always_inline unsigned int __next_node(int n, const nodemask_t *srcp)
{
return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
}
@@ -277,7 +277,7 @@ static inline unsigned int __next_node(int n, const nodemask_t *srcp)
* the first node in src if needed. Returns MAX_NUMNODES if src is empty.
*/
#define next_node_in(n, src) __next_node_in((n), &(src))
-static inline unsigned int __next_node_in(int node, const nodemask_t *srcp)
+static __always_inline unsigned int __next_node_in(int node, const nodemask_t *srcp)
{
unsigned int ret = __next_node(node, srcp);
@@ -286,7 +286,7 @@ static inline unsigned int __next_node_in(int node, const nodemask_t *srcp)
return ret;
}
-static inline void init_nodemask_of_node(nodemask_t *mask, int node)
+static __always_inline void init_nodemask_of_node(nodemask_t *mask, int node)
{
nodes_clear(*mask);
node_set(node, *mask);
@@ -304,7 +304,7 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node)
})
#define first_unset_node(mask) __first_unset_node(&(mask))
-static inline unsigned int __first_unset_node(const nodemask_t *maskp)
+static __always_inline unsigned int __first_unset_node(const nodemask_t *maskp)
{
return min_t(unsigned int, MAX_NUMNODES,
find_first_zero_bit(maskp->bits, MAX_NUMNODES));
@@ -338,21 +338,21 @@ static inline unsigned int __first_unset_node(const nodemask_t *maskp)
#define nodemask_parse_user(ubuf, ulen, dst) \
__nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES)
-static inline int __nodemask_parse_user(const char __user *buf, int len,
+static __always_inline int __nodemask_parse_user(const char __user *buf, int len,
nodemask_t *dstp, int nbits)
{
return bitmap_parse_user(buf, len, dstp->bits, nbits);
}
#define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES)
-static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits)
+static __always_inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits)
{
return bitmap_parselist(buf, dstp->bits, nbits);
}
#define node_remap(oldbit, old, new) \
__node_remap((oldbit), &(old), &(new), MAX_NUMNODES)
-static inline int __node_remap(int oldbit,
+static __always_inline int __node_remap(int oldbit,
const nodemask_t *oldp, const nodemask_t *newp, int nbits)
{
return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits);
@@ -360,7 +360,7 @@ static inline int __node_remap(int oldbit,
#define nodes_remap(dst, src, old, new) \
__nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES)
-static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
+static __always_inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
const nodemask_t *oldp, const nodemask_t *newp, int nbits)
{
bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
@@ -368,7 +368,7 @@ static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
#define nodes_onto(dst, orig, relmap) \
__nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES)
-static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
+static __always_inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
const nodemask_t *relmapp, int nbits)
{
bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
@@ -376,7 +376,7 @@ static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
#define nodes_fold(dst, orig, sz) \
__nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES)
-static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
+static __always_inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
int sz, int nbits)
{
bitmap_fold(dstp->bits, origp->bits, sz, nbits);
@@ -418,22 +418,22 @@ enum node_states {
extern nodemask_t node_states[NR_NODE_STATES];
#if MAX_NUMNODES > 1
-static inline int node_state(int node, enum node_states state)
+static __always_inline int node_state(int node, enum node_states state)
{
return node_isset(node, node_states[state]);
}
-static inline void node_set_state(int node, enum node_states state)
+static __always_inline void node_set_state(int node, enum node_states state)
{
__node_set(node, &node_states[state]);
}
-static inline void node_clear_state(int node, enum node_states state)
+static __always_inline void node_clear_state(int node, enum node_states state)
{
__node_clear(node, &node_states[state]);
}
-static inline int num_node_state(enum node_states state)
+static __always_inline int num_node_state(enum node_states state)
{
return nodes_weight(node_states[state]);
}
@@ -443,11 +443,11 @@ static inline int num_node_state(enum node_states state)
#define first_online_node first_node(node_states[N_ONLINE])
#define first_memory_node first_node(node_states[N_MEMORY])
-static inline unsigned int next_online_node(int nid)
+static __always_inline unsigned int next_online_node(int nid)
{
return next_node(nid, node_states[N_ONLINE]);
}
-static inline unsigned int next_memory_node(int nid)
+static __always_inline unsigned int next_memory_node(int nid)
{
return next_node(nid, node_states[N_MEMORY]);
}
@@ -455,13 +455,13 @@ static inline unsigned int next_memory_node(int nid)
extern unsigned int nr_node_ids;
extern unsigned int nr_online_nodes;
-static inline void node_set_online(int nid)
+static __always_inline void node_set_online(int nid)
{
node_set_state(nid, N_ONLINE);
nr_online_nodes = num_node_state(N_ONLINE);
}
-static inline void node_set_offline(int nid)
+static __always_inline void node_set_offline(int nid)
{
node_clear_state(nid, N_ONLINE);
nr_online_nodes = num_node_state(N_ONLINE);
@@ -469,20 +469,20 @@ static inline void node_set_offline(int nid)
#else
-static inline int node_state(int node, enum node_states state)
+static __always_inline int node_state(int node, enum node_states state)
{
return node == 0;
}
-static inline void node_set_state(int node, enum node_states state)
+static __always_inline void node_set_state(int node, enum node_states state)
{
}
-static inline void node_clear_state(int node, enum node_states state)
+static __always_inline void node_clear_state(int node, enum node_states state)
{
}
-static inline int num_node_state(enum node_states state)
+static __always_inline int num_node_state(enum node_states state)
{
return 1;
}
@@ -502,7 +502,7 @@ static inline int num_node_state(enum node_states state)
#endif
-static inline int node_random(const nodemask_t *maskp)
+static __always_inline int node_random(const nodemask_t *maskp)
{
#if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1)
int w, bit;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index d422db6eec63..7132623e4658 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -52,7 +52,7 @@ struct platform_device {
extern int platform_device_register(struct platform_device *);
extern void platform_device_unregister(struct platform_device *);
-extern struct bus_type platform_bus_type;
+extern const struct bus_type platform_bus_type;
extern struct device platform_bus;
extern struct resource *platform_get_resource(struct platform_device *,
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index f57f05331d73..569f86a44aaa 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -176,9 +176,9 @@ TRACE_EVENT(dma_free,
);
TRACE_EVENT(dma_map_sg,
- TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+ TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
int ents, enum dma_data_direction dir, unsigned long attrs),
- TP_ARGS(dev, sg, nents, ents, dir, attrs),
+ TP_ARGS(dev, sgl, nents, ents, dir, attrs),
TP_STRUCT__entry(
__string(device, dev_name(dev))
@@ -190,17 +190,17 @@ TRACE_EVENT(dma_map_sg,
),
TP_fast_assign(
+ struct scatterlist *sg;
int i;
__assign_str(device);
- for (i = 0; i < nents; i++)
- ((u64 *)__get_dynamic_array(phys_addrs))[i] =
- sg_phys(sg + i);
- for (i = 0; i < ents; i++) {
+ for_each_sg(sgl, sg, nents, i)
+ ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg);
+ for_each_sg(sgl, sg, ents, i) {
((u64 *)__get_dynamic_array(dma_addrs))[i] =
- sg_dma_address(sg + i);
+ sg_dma_address(sg);
((unsigned int *)__get_dynamic_array(lengths))[i] =
- sg_dma_len(sg + i);
+ sg_dma_len(sg);
}
__entry->dir = dir;
__entry->attrs = attrs;
@@ -222,9 +222,9 @@ TRACE_EVENT(dma_map_sg,
);
TRACE_EVENT(dma_unmap_sg,
- TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+ TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs),
- TP_ARGS(dev, sg, nents, dir, attrs),
+ TP_ARGS(dev, sgl, nents, dir, attrs),
TP_STRUCT__entry(
__string(device, dev_name(dev))
@@ -234,12 +234,12 @@ TRACE_EVENT(dma_unmap_sg,
),
TP_fast_assign(
+ struct scatterlist *sg;
int i;
__assign_str(device);
- for (i = 0; i < nents; i++)
- ((u64 *)__get_dynamic_array(addrs))[i] =
- sg_phys(sg + i);
+ for_each_sg(sgl, sg, nents, i)
+ ((u64 *)__get_dynamic_array(addrs))[i] = sg_phys(sg);
__entry->dir = dir;
__entry->attrs = attrs;
),
@@ -290,9 +290,9 @@ DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device,
TP_ARGS(dev, dma_addr, size, dir));
DECLARE_EVENT_CLASS(dma_sync_sg,
- TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
+ TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir),
- TP_ARGS(dev, sg, nents, dir),
+ TP_ARGS(dev, sgl, nents, dir),
TP_STRUCT__entry(
__string(device, dev_name(dev))
@@ -302,14 +302,15 @@ DECLARE_EVENT_CLASS(dma_sync_sg,
),
TP_fast_assign(
+ struct scatterlist *sg;
int i;
__assign_str(device);
- for (i = 0; i < nents; i++) {
+ for_each_sg(sgl, sg, nents, i) {
((u64 *)__get_dynamic_array(dma_addrs))[i] =
- sg_dma_address(sg + i);
+ sg_dma_address(sg);
((unsigned int *)__get_dynamic_array(lengths))[i] =
- sg_dma_len(sg + i);
+ sg_dma_len(sg);
}
__entry->dir = dir;
),
diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h
index 3c2a101986a3..5ee30f882736 100644
--- a/include/uapi/linux/bits.h
+++ b/include/uapi/linux/bits.h
@@ -12,4 +12,7 @@
(((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \
(~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+#define __GENMASK_U128(h, l) \
+ ((_BIT128((h)) << 1) - (_BIT128(l)))
+
#endif /* _UAPI_LINUX_BITS_H */
diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h
index a429381e7ca5..e16be0d37746 100644
--- a/include/uapi/linux/const.h
+++ b/include/uapi/linux/const.h
@@ -28,6 +28,23 @@
#define _BITUL(x) (_UL(1) << (x))
#define _BITULL(x) (_ULL(1) << (x))
+#if !defined(__ASSEMBLY__)
+/*
+ * Missing asm support
+ *
+ * __BIT128() would not work in the asm code, as it shifts an
+ * 'unsigned __init128' data type as direct representation of
+ * 128 bit constants is not supported in the gcc compiler, as
+ * they get silently truncated.
+ *
+ * TODO: Please revisit this implementation when gcc compiler
+ * starts representing 128 bit constants directly like long
+ * and unsigned long etc. Subsequently drop the comment for
+ * GENMASK_U128() which would then start supporting asm code.
+ */
+#define _BIT128(x) ((unsigned __int128)(1) << (x))
+#endif
+
#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h
index 8b8c5d1420fe..8e2c8fd44764 100644
--- a/include/uapi/xen/privcmd.h
+++ b/include/uapi/xen/privcmd.h
@@ -126,6 +126,11 @@ struct privcmd_ioeventfd {
__u8 pad[2];
};
+struct privcmd_pcidev_get_gsi {
+ __u32 sbdf;
+ __u32 gsi;
+};
+
/*
* @cmd: IOCTL_PRIVCMD_HYPERCALL
* @arg: &privcmd_hypercall_t
@@ -157,5 +162,7 @@ struct privcmd_ioeventfd {
_IOW('P', 8, struct privcmd_irqfd)
#define IOCTL_PRIVCMD_IOEVENTFD \
_IOW('P', 9, struct privcmd_ioeventfd)
+#define IOCTL_PRIVCMD_PCIDEV_GET_GSI \
+ _IOC(_IOC_NONE, 'P', 10, sizeof(struct privcmd_pcidev_get_gsi))
#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
index b1e11863144d..daa96a22d257 100644
--- a/include/xen/acpi.h
+++ b/include/xen/acpi.h
@@ -67,10 +67,37 @@ static inline void xen_acpi_sleep_register(void)
acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel;
}
}
+int xen_pvh_setup_gsi(int gsi, int trigger, int polarity);
+int xen_acpi_get_gsi_info(struct pci_dev *dev,
+ int *gsi_out,
+ int *trigger_out,
+ int *polarity_out);
#else
static inline void xen_acpi_sleep_register(void)
{
}
+
+static inline int xen_pvh_setup_gsi(int gsi, int trigger, int polarity)
+{
+ return -1;
+}
+
+static inline int xen_acpi_get_gsi_info(struct pci_dev *dev,
+ int *gsi_out,
+ int *trigger_out,
+ int *polarity_out)
+{
+ return -1;
+}
+#endif
+
+#ifdef CONFIG_XEN_PCI_STUB
+int pcistub_get_gsi_from_sbdf(unsigned int sbdf);
+#else
+static inline int pcistub_get_gsi_from_sbdf(unsigned int sbdf)
+{
+ return -1;
+}
#endif
#endif /* _XEN_ACPI_H */
diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index 38deb1214613..918f47d87d7a 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -11,7 +11,9 @@
#define __XEN_PUBLIC_ELFNOTE_H__
/*
- * The notes should live in a SHT_NOTE segment and have "Xen" in the
+ * `incontents 200 elfnotes ELF notes
+ *
+ * The notes should live in a PT_NOTE segment and have "Xen" in the
* name field.
*
* Numeric types are either 4 or 8 bytes depending on the content of
@@ -22,6 +24,8 @@
*
* String values (for non-legacy) are NULL terminated ASCII, also known
* as ASCIZ type.
+ *
+ * Xen only uses ELF Notes contained in x86 binaries.
*/
/*
@@ -52,7 +56,7 @@
#define XEN_ELFNOTE_VIRT_BASE 3
/*
- * The offset of the ELF paddr field from the acutal required
+ * The offset of the ELF paddr field from the actual required
* pseudo-physical address (numeric).
*
* This is used to maintain backwards compatibility with older kernels
@@ -92,7 +96,12 @@
#define XEN_ELFNOTE_LOADER 8
/*
- * The kernel supports PAE (x86/32 only, string = "yes" or "no").
+ * The kernel supports PAE (x86/32 only, string = "yes", "no" or
+ * "bimodal").
+ *
+ * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting
+ * may be given as "yes,bimodal" which will cause older Xen to treat
+ * this kernel as PAE.
*
* LEGACY: PAE (n.b. The legacy interface included a provision to
* indicate 'extended-cr3' support allowing L3 page tables to be
@@ -149,7 +158,9 @@
* The (non-default) location the initial phys-to-machine map should be
* placed at by the hypervisor (Dom0) or the tools (DomU).
* The kernel must be prepared for this mapping to be established using
- * large pages, despite such otherwise not being available to guests.
+ * large pages, despite such otherwise not being available to guests. Note
+ * that these large pages may be misaligned in PFN space (they'll obviously
+ * be aligned in MFN and virtual address spaces).
* The kernel must also be able to handle the page table pages used for
* this mapping not being accessible through the initial mapping.
* (Only x86-64 supports this at present.)
@@ -186,8 +197,80 @@
#define XEN_ELFNOTE_PHYS32_ENTRY 18
/*
+ * Physical loading constraints for PVH kernels
+ *
+ * The presence of this note indicates the kernel supports relocating itself.
+ *
+ * The note may include up to three 32bit values to place constraints on the
+ * guest physical loading addresses and alignment for a PVH kernel. Values
+ * are read in the following order:
+ * - a required start alignment (default 0x200000)
+ * - a minimum address for the start of the image (default 0; see below)
+ * - a maximum address for the last byte of the image (default 0xffffffff)
+ *
+ * When this note specifies an alignment value, it is used. Otherwise the
+ * maximum p_align value from loadable ELF Program Headers is used, if it is
+ * greater than or equal to 4k (0x1000). Otherwise, the default is used.
+ */
+#define XEN_ELFNOTE_PHYS32_RELOC 19
+
+/*
* The number of the highest elfnote defined.
*/
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_RELOC
+
+/*
+ * System information exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO
+ * note in case of a system crash. This note will contain various
+ * information about the system, see xen/include/xen/elfcore.h.
+ */
+#define XEN_ELFNOTE_CRASH_INFO 0x1000001
+
+/*
+ * System registers exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS
+ * note per cpu in case of a system crash. This note is architecture
+ * specific and will contain registers not saved in the "CORE" note.
+ * See xen/include/xen/elfcore.h for more information.
+ */
+#define XEN_ELFNOTE_CRASH_REGS 0x1000002
+
+
+/*
+ * xen dump-core none note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE
+ * in its dump file to indicate that the file is xen dump-core
+ * file. This note doesn't have any other information.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000
+
+/*
+ * xen dump-core header note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER
+ * in its dump file.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001
+
+/*
+ * xen dump-core xen version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION
+ * in its dump file. It contains the xen version obtained via the
+ * XENVER hypercall.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002
+
+/*
+ * xen dump-core format version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION
+ * in its dump file. It contains a format version identifier.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index a237af867873..df74e65a884b 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -256,6 +256,13 @@ struct physdev_pci_device_add {
*/
#define PHYSDEVOP_prepare_msix 30
#define PHYSDEVOP_release_msix 31
+/*
+ * Notify the hypervisor that a PCI device has been reset, so that any
+ * internally cached state is regenerated. Should be called after any
+ * device reset performed by the hardware domain.
+ */
+#define PHYSDEVOP_pci_device_reset 32
+
struct physdev_pci_device {
/* IN */
uint16_t seg;
@@ -263,6 +270,16 @@ struct physdev_pci_device {
uint8_t devfn;
};
+struct pci_device_reset {
+ struct physdev_pci_device dev;
+#define PCI_DEVICE_RESET_COLD 0x0
+#define PCI_DEVICE_RESET_WARM 0x1
+#define PCI_DEVICE_RESET_HOT 0x2
+#define PCI_DEVICE_RESET_FLR 0x3
+#define PCI_DEVICE_RESET_MASK 0x3
+ uint32_t flags;
+};
+
#define PHYSDEVOP_DBGP_RESET_PREPARE 1
#define PHYSDEVOP_DBGP_RESET_DONE 2
diff --git a/include/xen/pci.h b/include/xen/pci.h
index b8337cf85fd1..424b8ea89ca8 100644
--- a/include/xen/pci.h
+++ b/include/xen/pci.h
@@ -4,10 +4,16 @@
#define __XEN_PCI_H__
#if defined(CONFIG_XEN_DOM0)
+int xen_reset_device(const struct pci_dev *dev);
int xen_find_device_domain_owner(struct pci_dev *dev);
int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
int xen_unregister_device_domain_owner(struct pci_dev *dev);
#else
+static inline int xen_reset_device(const struct pci_dev *dev)
+{
+ return -1;
+}
+
static inline int xen_find_device_domain_owner(struct pci_dev *dev)
{
return -1;
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 112581cf97e7..106735145948 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -283,7 +283,6 @@ static int iter_release(struct inode *inode, struct file *file)
const struct file_operations bpf_iter_fops = {
.open = iter_open,
- .llseek = no_llseek,
.read = bpf_seq_read,
.release = iter_release,
};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5a8071c45c80..e3589c4287cb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6821,7 +6821,6 @@ static int perf_fasync(int fd, struct file *filp, int on)
}
static const struct file_operations perf_fops = {
- .llseek = no_llseek,
.release = perf_release,
.read = perf_read,
.poll = perf_poll,
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 6dc76b590703..93a822d3c468 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -168,7 +168,7 @@ bool static_key_slow_inc_cpuslocked(struct static_key *key)
jump_label_update(key);
/*
* Ensure that when static_key_fast_inc_not_disabled() or
- * static_key_slow_try_dec() observe the positive value,
+ * static_key_dec_not_one() observe the positive value,
* they must also observe all the text changes.
*/
atomic_set_release(&key->enabled, 1);
@@ -250,7 +250,7 @@ void static_key_disable(struct static_key *key)
}
EXPORT_SYMBOL_GPL(static_key_disable);
-static bool static_key_slow_try_dec(struct static_key *key)
+static bool static_key_dec_not_one(struct static_key *key)
{
int v;
@@ -274,6 +274,14 @@ static bool static_key_slow_try_dec(struct static_key *key)
* enabled. This suggests an ordering problem on the user side.
*/
WARN_ON_ONCE(v < 0);
+
+ /*
+ * Warn about underflow, and lie about success in an attempt to
+ * not make things worse.
+ */
+ if (WARN_ON_ONCE(v == 0))
+ return true;
+
if (v <= 1)
return false;
} while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v - 1)));
@@ -284,15 +292,27 @@ static bool static_key_slow_try_dec(struct static_key *key)
static void __static_key_slow_dec_cpuslocked(struct static_key *key)
{
lockdep_assert_cpus_held();
+ int val;
- if (static_key_slow_try_dec(key))
+ if (static_key_dec_not_one(key))
return;
guard(mutex)(&jump_label_mutex);
- if (atomic_cmpxchg(&key->enabled, 1, 0) == 1)
+ val = atomic_read(&key->enabled);
+ /*
+ * It should be impossible to observe -1 with jump_label_mutex held,
+ * see static_key_slow_inc_cpuslocked().
+ */
+ if (WARN_ON_ONCE(val == -1))
+ return;
+ /*
+ * Cannot already be 0, something went sideways.
+ */
+ if (WARN_ON_ONCE(val == 0))
+ return;
+
+ if (atomic_dec_and_test(&key->enabled))
jump_label_update(key);
- else
- WARN_ON_ONCE(!static_key_slow_try_dec(key));
}
static void __static_key_slow_dec(struct static_key *key)
@@ -329,7 +349,7 @@ void __static_key_slow_dec_deferred(struct static_key *key,
{
STATIC_KEY_CHECK_USE(key);
- if (static_key_slow_try_dec(key))
+ if (static_key_dec_not_one(key))
return;
schedule_delayed_work(work, timeout);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 7963deac33c3..536bd471557f 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -788,7 +788,7 @@ static void lockdep_print_held_locks(struct task_struct *p)
printk("no locks held by %s/%d.\n", p->comm, task_pid_nr(p));
else
printk("%d lock%s held by %s/%d:\n", depth,
- depth > 1 ? "s" : "", p->comm, task_pid_nr(p));
+ str_plural(depth), p->comm, task_pid_nr(p));
/*
* It's not reliable to print a task's held locks if it's not sleeping
* and it's not the current task.
@@ -2084,6 +2084,9 @@ static noinline void print_bfs_bug(int ret)
/*
* Breadth-first-search failed, graph got corrupted?
*/
+ if (ret == BFS_EQUEUEFULL)
+ pr_warn("Increase LOCKDEP_CIRCULAR_QUEUE_BITS to avoid this warning:\n");
+
WARN(1, "lockdep bfs error:%d\n", ret);
}
@@ -6263,25 +6266,27 @@ static struct pending_free *get_pending_free(void)
static void free_zapped_rcu(struct rcu_head *cb);
/*
- * Schedule an RCU callback if no RCU callback is pending. Must be called with
- * the graph lock held.
- */
-static void call_rcu_zapped(struct pending_free *pf)
+* See if we need to queue an RCU callback, must called with
+* the lockdep lock held, returns false if either we don't have
+* any pending free or the callback is already scheduled.
+* Otherwise, a call_rcu() must follow this function call.
+*/
+static bool prepare_call_rcu_zapped(struct pending_free *pf)
{
WARN_ON_ONCE(inside_selftest());
if (list_empty(&pf->zapped))
- return;
+ return false;
if (delayed_free.scheduled)
- return;
+ return false;
delayed_free.scheduled = true;
WARN_ON_ONCE(delayed_free.pf + delayed_free.index != pf);
delayed_free.index ^= 1;
- call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
+ return true;
}
/* The caller must hold the graph lock. May be called from RCU context. */
@@ -6307,6 +6312,7 @@ static void free_zapped_rcu(struct rcu_head *ch)
{
struct pending_free *pf;
unsigned long flags;
+ bool need_callback;
if (WARN_ON_ONCE(ch != &delayed_free.rcu_head))
return;
@@ -6318,14 +6324,18 @@ static void free_zapped_rcu(struct rcu_head *ch)
pf = delayed_free.pf + (delayed_free.index ^ 1);
__free_zapped_classes(pf);
delayed_free.scheduled = false;
+ need_callback =
+ prepare_call_rcu_zapped(delayed_free.pf + delayed_free.index);
+ lockdep_unlock();
+ raw_local_irq_restore(flags);
/*
- * If there's anything on the open list, close and start a new callback.
- */
- call_rcu_zapped(delayed_free.pf + delayed_free.index);
+ * If there's pending free and its callback has not been scheduled,
+ * queue an RCU callback.
+ */
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
- lockdep_unlock();
- raw_local_irq_restore(flags);
}
/*
@@ -6365,6 +6375,7 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
{
struct pending_free *pf;
unsigned long flags;
+ bool need_callback;
init_data_structures_once();
@@ -6372,10 +6383,11 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
lockdep_lock();
pf = get_pending_free();
__lockdep_free_key_range(pf, start, size);
- call_rcu_zapped(pf);
+ need_callback = prepare_call_rcu_zapped(pf);
lockdep_unlock();
raw_local_irq_restore(flags);
-
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
/*
* Wait for any possible iterators from look_up_lock_class() to pass
* before continuing to free the memory they refer to.
@@ -6469,6 +6481,7 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock)
struct pending_free *pf;
unsigned long flags;
int locked;
+ bool need_callback = false;
raw_local_irq_save(flags);
locked = graph_lock();
@@ -6477,11 +6490,13 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock)
pf = get_pending_free();
__lockdep_reset_lock(pf, lock);
- call_rcu_zapped(pf);
+ need_callback = prepare_call_rcu_zapped(pf);
graph_unlock();
out_irq:
raw_local_irq_restore(flags);
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
}
/*
@@ -6525,6 +6540,7 @@ void lockdep_unregister_key(struct lock_class_key *key)
struct pending_free *pf;
unsigned long flags;
bool found = false;
+ bool need_callback = false;
might_sleep();
@@ -6545,11 +6561,14 @@ void lockdep_unregister_key(struct lock_class_key *key)
if (found) {
pf = get_pending_free();
__lockdep_free_key_range(pf, key, 1);
- call_rcu_zapped(pf);
+ need_callback = prepare_call_rcu_zapped(pf);
}
lockdep_unlock();
raw_local_irq_restore(flags);
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
+
/* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
synchronize_rcu();
}
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index e2bfb1db589d..6db0f43fc4df 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -424,7 +424,7 @@ static void seq_line(struct seq_file *m, char c, int offset, int length)
for (i = 0; i < offset; i++)
seq_puts(m, " ");
for (i = 0; i < length; i++)
- seq_printf(m, "%c", c);
+ seq_putc(m, c);
seq_puts(m, "\n");
}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 5ded7dff46ef..2bbb6eca5144 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -181,12 +181,21 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
__rwsem_set_reader_owned(sem, current);
}
+#ifdef CONFIG_DEBUG_RWSEMS
+/*
+ * Return just the real task structure pointer of the owner
+ */
+static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
+{
+ return (struct task_struct *)
+ (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
+}
+
/*
* Return true if the rwsem is owned by a reader.
*/
static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
{
-#ifdef CONFIG_DEBUG_RWSEMS
/*
* Check the count to see if it is write-locked.
*/
@@ -194,11 +203,9 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
if (count & RWSEM_WRITER_MASK)
return false;
-#endif
return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
}
-#ifdef CONFIG_DEBUG_RWSEMS
/*
* With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
* is a task pointer in owner of a reader-owned rwsem, it will be the
@@ -266,15 +273,6 @@ static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
}
/*
- * Return just the real task structure pointer of the owner
- */
-static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
-{
- return (struct task_struct *)
- (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
-}
-
-/*
* Return the real task structure pointer of the owner and the embedded
* flags in the owner. pflags must be non-NULL.
*/
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index 05a9a06a140c..7c6588148d42 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -229,7 +229,7 @@ comment "Do not forget to sign required modules with scripts/sign-file"
depends on MODULE_SIG_FORCE && !MODULE_SIG_ALL
choice
- prompt "Which hash algorithm should modules be signed with?"
+ prompt "Hash algorithm to sign modules"
depends on MODULE_SIG || IMA_APPRAISE_MODSIG
help
This determines which sort of hashing algorithm will be used during
@@ -239,31 +239,31 @@ choice
the signature on that module.
config MODULE_SIG_SHA1
- bool "Sign modules with SHA-1"
+ bool "SHA-1"
select CRYPTO_SHA1
config MODULE_SIG_SHA256
- bool "Sign modules with SHA-256"
+ bool "SHA-256"
select CRYPTO_SHA256
config MODULE_SIG_SHA384
- bool "Sign modules with SHA-384"
+ bool "SHA-384"
select CRYPTO_SHA512
config MODULE_SIG_SHA512
- bool "Sign modules with SHA-512"
+ bool "SHA-512"
select CRYPTO_SHA512
config MODULE_SIG_SHA3_256
- bool "Sign modules with SHA3-256"
+ bool "SHA3-256"
select CRYPTO_SHA3
config MODULE_SIG_SHA3_384
- bool "Sign modules with SHA3-384"
+ bool "SHA3-384"
select CRYPTO_SHA3
config MODULE_SIG_SHA3_512
- bool "Sign modules with SHA3-512"
+ bool "SHA3-512"
select CRYPTO_SHA3
endchoice
@@ -279,64 +279,65 @@ config MODULE_SIG_HASH
default "sha3-384" if MODULE_SIG_SHA3_384
default "sha3-512" if MODULE_SIG_SHA3_512
-choice
- prompt "Module compression mode"
+config MODULE_COMPRESS
+ bool "Module compression"
help
- This option allows you to choose the algorithm which will be used to
- compress modules when 'make modules_install' is run. (or, you can
- choose to not compress modules at all.)
-
- External modules will also be compressed in the same way during the
- installation.
-
- For modules inside an initrd or initramfs, it's more efficient to
- compress the whole initrd or initramfs instead.
-
+ Enable module compression to reduce on-disk size of module binaries.
This is fully compatible with signed modules.
- Please note that the tool used to load modules needs to support the
- corresponding algorithm. module-init-tools MAY support gzip, and kmod
- MAY support gzip, xz and zstd.
+ The tool used to work with modules needs to support the selected
+ compression type. kmod MAY support gzip, xz and zstd. Other tools
+ might have a limited selection of the supported types.
- Your build system needs to provide the appropriate compression tool
- to compress the modules.
+ Note that for modules inside an initrd or initramfs, it's more
+ efficient to compress the whole ramdisk instead.
- If in doubt, select 'None'.
+ If unsure, say N.
-config MODULE_COMPRESS_NONE
- bool "None"
+choice
+ prompt "Module compression type"
+ depends on MODULE_COMPRESS
help
- Do not compress modules. The installed modules are suffixed
- with .ko.
+ Choose the supported algorithm for module compression.
config MODULE_COMPRESS_GZIP
bool "GZIP"
help
- Compress modules with GZIP. The installed modules are suffixed
- with .ko.gz.
+ Support modules compressed with GZIP. The installed modules are
+ suffixed with .ko.gz.
config MODULE_COMPRESS_XZ
bool "XZ"
help
- Compress modules with XZ. The installed modules are suffixed
- with .ko.xz.
+ Support modules compressed with XZ. The installed modules are
+ suffixed with .ko.xz.
config MODULE_COMPRESS_ZSTD
bool "ZSTD"
help
- Compress modules with ZSTD. The installed modules are suffixed
- with .ko.zst.
+ Support modules compressed with ZSTD. The installed modules are
+ suffixed with .ko.zst.
endchoice
+config MODULE_COMPRESS_ALL
+ bool "Automatically compress all modules"
+ default y
+ depends on MODULE_COMPRESS
+ help
+ Compress all modules during 'make modules_install'.
+
+ Your build system needs to provide the appropriate compression tool
+ for the selected compression type. External modules will also be
+ compressed in the same way during the installation.
+
config MODULE_DECOMPRESS
bool "Support in-kernel module decompression"
- depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ || MODULE_COMPRESS_ZSTD
+ depends on MODULE_COMPRESS
select ZLIB_INFLATE if MODULE_COMPRESS_GZIP
select XZ_DEC if MODULE_COMPRESS_XZ
select ZSTD_DECOMPRESS if MODULE_COMPRESS_ZSTD
help
-
Support for decompressing kernel modules by the kernel itself
instead of relying on userspace to perform this task. Useful when
load pinning security policy is enabled.
diff --git a/kernel/module/debug_kmemleak.c b/kernel/module/debug_kmemleak.c
index 12a569d361e8..b4cc03842d70 100644
--- a/kernel/module/debug_kmemleak.c
+++ b/kernel/module/debug_kmemleak.c
@@ -12,19 +12,9 @@
void kmemleak_load_module(const struct module *mod,
const struct load_info *info)
{
- unsigned int i;
-
- /* only scan the sections containing data */
- kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
-
- for (i = 1; i < info->hdr->e_shnum; i++) {
- /* Scan all writable sections that's not executable */
- if (!(info->sechdrs[i].sh_flags & SHF_ALLOC) ||
- !(info->sechdrs[i].sh_flags & SHF_WRITE) ||
- (info->sechdrs[i].sh_flags & SHF_EXECINSTR))
- continue;
-
- kmemleak_scan_area((void *)info->sechdrs[i].sh_addr,
- info->sechdrs[i].sh_size, GFP_KERNEL);
+ /* only scan writable, non-executable sections */
+ for_each_mod_mem_type(type) {
+ if (type != MOD_DATA && type != MOD_INIT_DATA)
+ kmemleak_no_scan(mod->mem[type].base);
}
}
diff --git a/kernel/module/sysfs.c b/kernel/module/sysfs.c
index 26efe1305c12..456358e1fdc4 100644
--- a/kernel/module/sysfs.c
+++ b/kernel/module/sysfs.c
@@ -69,12 +69,13 @@ static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
kfree(sect_attrs);
}
-static void add_sect_attrs(struct module *mod, const struct load_info *info)
+static int add_sect_attrs(struct module *mod, const struct load_info *info)
{
unsigned int nloaded = 0, i, size[2];
struct module_sect_attrs *sect_attrs;
struct module_sect_attr *sattr;
struct bin_attribute **gattr;
+ int ret;
/* Count loaded sections and allocate structures */
for (i = 0; i < info->hdr->e_shnum; i++)
@@ -85,7 +86,7 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info)
size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.bin_attrs[0]);
sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL);
if (!sect_attrs)
- return;
+ return -ENOMEM;
/* Setup section attributes. */
sect_attrs->grp.name = "sections";
@@ -103,8 +104,10 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info)
sattr->address = sec->sh_addr;
sattr->battr.attr.name =
kstrdup(info->secstrings + sec->sh_name, GFP_KERNEL);
- if (!sattr->battr.attr.name)
+ if (!sattr->battr.attr.name) {
+ ret = -ENOMEM;
goto out;
+ }
sect_attrs->nsections++;
sattr->battr.read = module_sect_read;
sattr->battr.size = MODULE_SECT_READ_SIZE;
@@ -113,13 +116,15 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info)
}
*gattr = NULL;
- if (sysfs_create_group(&mod->mkobj.kobj, &sect_attrs->grp))
+ ret = sysfs_create_group(&mod->mkobj.kobj, &sect_attrs->grp);
+ if (ret)
goto out;
mod->sect_attrs = sect_attrs;
- return;
+ return 0;
out:
free_sect_attrs(sect_attrs);
+ return ret;
}
static void remove_sect_attrs(struct module *mod)
@@ -158,15 +163,12 @@ static void free_notes_attrs(struct module_notes_attrs *notes_attrs,
kfree(notes_attrs);
}
-static void add_notes_attrs(struct module *mod, const struct load_info *info)
+static int add_notes_attrs(struct module *mod, const struct load_info *info)
{
unsigned int notes, loaded, i;
struct module_notes_attrs *notes_attrs;
struct bin_attribute *nattr;
-
- /* failed to create section attributes, so can't create notes */
- if (!mod->sect_attrs)
- return;
+ int ret;
/* Count notes sections and allocate structures. */
notes = 0;
@@ -176,12 +178,12 @@ static void add_notes_attrs(struct module *mod, const struct load_info *info)
++notes;
if (notes == 0)
- return;
+ return 0;
notes_attrs = kzalloc(struct_size(notes_attrs, attrs, notes),
GFP_KERNEL);
if (!notes_attrs)
- return;
+ return -ENOMEM;
notes_attrs->notes = notes;
nattr = &notes_attrs->attrs[0];
@@ -201,19 +203,23 @@ static void add_notes_attrs(struct module *mod, const struct load_info *info)
}
notes_attrs->dir = kobject_create_and_add("notes", &mod->mkobj.kobj);
- if (!notes_attrs->dir)
+ if (!notes_attrs->dir) {
+ ret = -ENOMEM;
goto out;
+ }
- for (i = 0; i < notes; ++i)
- if (sysfs_create_bin_file(notes_attrs->dir,
- &notes_attrs->attrs[i]))
+ for (i = 0; i < notes; ++i) {
+ ret = sysfs_create_bin_file(notes_attrs->dir, &notes_attrs->attrs[i]);
+ if (ret)
goto out;
+ }
mod->notes_attrs = notes_attrs;
- return;
+ return 0;
out:
free_notes_attrs(notes_attrs, i);
+ return ret;
}
static void remove_notes_attrs(struct module *mod)
@@ -223,9 +229,15 @@ static void remove_notes_attrs(struct module *mod)
}
#else /* !CONFIG_KALLSYMS */
-static inline void add_sect_attrs(struct module *mod, const struct load_info *info) { }
+static inline int add_sect_attrs(struct module *mod, const struct load_info *info)
+{
+ return 0;
+}
static inline void remove_sect_attrs(struct module *mod) { }
-static inline void add_notes_attrs(struct module *mod, const struct load_info *info) { }
+static inline int add_notes_attrs(struct module *mod, const struct load_info *info)
+{
+ return 0;
+}
static inline void remove_notes_attrs(struct module *mod) { }
#endif /* CONFIG_KALLSYMS */
@@ -385,11 +397,20 @@ int mod_sysfs_setup(struct module *mod,
if (err)
goto out_unreg_modinfo_attrs;
- add_sect_attrs(mod, info);
- add_notes_attrs(mod, info);
+ err = add_sect_attrs(mod, info);
+ if (err)
+ goto out_del_usage_links;
+
+ err = add_notes_attrs(mod, info);
+ if (err)
+ goto out_unreg_sect_attrs;
return 0;
+out_unreg_sect_attrs:
+ remove_sect_attrs(mod);
+out_del_usage_links:
+ del_usage_links(mod);
out_unreg_modinfo_attrs:
module_remove_modinfo_attrs(mod, -1);
out_unreg_param:
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 3aa41ba22129..3f9e3efb9f6e 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -447,7 +447,6 @@ static const struct file_operations snapshot_fops = {
.release = snapshot_release,
.read = snapshot_read,
.write = snapshot_write,
- .llseek = no_llseek,
.unlocked_ioctl = snapshot_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = snapshot_compat_ioctl,
diff --git a/kernel/relay.c b/kernel/relay.c
index a8e90e98bf2c..a8ae436dc77e 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1079,7 +1079,6 @@ const struct file_operations relay_file_operations = {
.poll = relay_file_poll,
.mmap = relay_file_mmap,
.read = relay_file_read,
- .llseek = no_llseek,
.release = relay_file_release,
};
EXPORT_SYMBOL_GPL(relay_file_operations);
diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
index 639397b5491c..5259cda486d0 100644
--- a/kernel/static_call_inline.c
+++ b/kernel/static_call_inline.c
@@ -411,6 +411,17 @@ static void static_call_del_module(struct module *mod)
for (site = start; site < stop; site++) {
key = static_call_key(site);
+
+ /*
+ * If the key was not updated due to a memory allocation
+ * failure in __static_call_init() then treating key::sites
+ * as key::mods in the code below would cause random memory
+ * access and #GP. In that case all subsequent sites have
+ * not been touched either, so stop iterating.
+ */
+ if (!static_call_key_has_mods(key))
+ break;
+
if (key == prev_key)
continue;
@@ -442,7 +453,7 @@ static int static_call_module_notify(struct notifier_block *nb,
case MODULE_STATE_COMING:
ret = static_call_add_module(mod);
if (ret) {
- WARN(1, "Failed to allocate memory for static calls");
+ pr_warn("Failed to allocate memory for static calls\n");
static_call_del_module(mod);
}
break;
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 4782edcbe7b9..c2f3d0c490d5 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -168,7 +168,6 @@ static int posix_clock_release(struct inode *inode, struct file *fp)
static const struct file_operations posix_clock_file_operations = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = posix_clock_read,
.poll = posix_clock_poll,
.unlocked_ioctl = posix_clock_ioctl,
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index df0745a42a3f..dc819aec43e8 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -306,7 +306,6 @@ static ssize_t monitor_enable_write_data(struct file *filp, const char __user *u
static const struct file_operations interface_enable_fops = {
.open = simple_open,
- .llseek = no_llseek,
.write = monitor_enable_write_data,
.read = monitor_enable_read_data,
};
@@ -329,7 +328,6 @@ static ssize_t monitor_desc_read_data(struct file *filp, char __user *user_buf,
static const struct file_operations interface_desc_fops = {
.open = simple_open,
- .llseek = no_llseek,
.read = monitor_desc_read_data,
};
@@ -674,7 +672,6 @@ static ssize_t monitoring_on_write_data(struct file *filp, const char __user *us
static const struct file_operations monitoring_on_fops = {
.open = simple_open,
- .llseek = no_llseek,
.write = monitoring_on_write_data,
.read = monitoring_on_read_data,
};
diff --git a/kernel/trace/rv/rv_reactors.c b/kernel/trace/rv/rv_reactors.c
index 6aae106695b6..7b49cbe388d4 100644
--- a/kernel/trace/rv/rv_reactors.c
+++ b/kernel/trace/rv/rv_reactors.c
@@ -426,7 +426,6 @@ static ssize_t reacting_on_write_data(struct file *filp, const char __user *user
static const struct file_operations reacting_on_fops = {
.open = simple_open,
- .llseek = no_llseek,
.write = reacting_on_write_data,
.read = reacting_on_read_data,
};
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b4f348b4653f..c01375adc471 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7557,7 +7557,6 @@ static const struct file_operations tracing_pipe_fops = {
.read = tracing_read_pipe,
.splice_read = tracing_splice_read_pipe,
.release = tracing_release_pipe,
- .llseek = no_llseek,
};
static const struct file_operations tracing_entries_fops = {
@@ -7636,7 +7635,6 @@ static const struct file_operations snapshot_raw_fops = {
.read = tracing_buffers_read,
.release = tracing_buffers_release,
.splice_read = tracing_buffers_splice_read,
- .llseek = no_llseek,
};
#endif /* CONFIG_TRACER_SNAPSHOT */
@@ -8466,7 +8464,6 @@ static const struct file_operations tracing_buffers_fops = {
.flush = tracing_buffers_flush,
.splice_read = tracing_buffers_splice_read,
.unlocked_ioctl = tracing_buffers_ioctl,
- .llseek = no_llseek,
.mmap = tracing_buffers_mmap,
};
diff --git a/lib/list-test.c b/lib/list-test.c
index 4f3dc75baec1..e207c4c98d70 100644
--- a/lib/list-test.c
+++ b/lib/list-test.c
@@ -408,13 +408,10 @@ static void list_test_list_cut_position(struct kunit *test)
KUNIT_EXPECT_EQ(test, i, 2);
- i = 0;
list_for_each(cur, &list1) {
KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
i++;
}
-
- KUNIT_EXPECT_EQ(test, i, 1);
}
static void list_test_list_cut_before(struct kunit *test)
@@ -439,13 +436,10 @@ static void list_test_list_cut_before(struct kunit *test)
KUNIT_EXPECT_EQ(test, i, 1);
- i = 0;
list_for_each(cur, &list1) {
KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
i++;
}
-
- KUNIT_EXPECT_EQ(test, i, 2);
}
static void list_test_list_splice(struct kunit *test)
diff --git a/lib/test_bits.c b/lib/test_bits.c
index 01313980f175..c7b38d91e1f1 100644
--- a/lib/test_bits.c
+++ b/lib/test_bits.c
@@ -39,6 +39,36 @@ static void genmask_ull_test(struct kunit *test)
#endif
}
+static void genmask_u128_test(struct kunit *test)
+{
+#ifdef CONFIG_ARCH_SUPPORTS_INT128
+ /* Below 64 bit masks */
+ KUNIT_EXPECT_EQ(test, 0x0000000000000001ull, GENMASK_U128(0, 0));
+ KUNIT_EXPECT_EQ(test, 0x0000000000000003ull, GENMASK_U128(1, 0));
+ KUNIT_EXPECT_EQ(test, 0x0000000000000006ull, GENMASK_U128(2, 1));
+ KUNIT_EXPECT_EQ(test, 0x00000000ffffffffull, GENMASK_U128(31, 0));
+ KUNIT_EXPECT_EQ(test, 0x000000ffffe00000ull, GENMASK_U128(39, 21));
+ KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_U128(63, 0));
+
+ /* Above 64 bit masks - only 64 bit portion can be validated once */
+ KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_U128(64, 0) >> 1);
+ KUNIT_EXPECT_EQ(test, 0x00000000ffffffffull, GENMASK_U128(81, 50) >> 50);
+ KUNIT_EXPECT_EQ(test, 0x0000000000ffffffull, GENMASK_U128(87, 64) >> 64);
+ KUNIT_EXPECT_EQ(test, 0x0000000000ff0000ull, GENMASK_U128(87, 80) >> 64);
+
+ KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_U128(127, 0) >> 64);
+ KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, (u64)GENMASK_U128(127, 0));
+ KUNIT_EXPECT_EQ(test, 0x0000000000000003ull, GENMASK_U128(127, 126) >> 126);
+ KUNIT_EXPECT_EQ(test, 0x0000000000000001ull, GENMASK_U128(127, 127) >> 127);
+#ifdef TEST_GENMASK_FAILURES
+ /* these should fail compilation */
+ GENMASK_U128(0, 1);
+ GENMASK_U128(0, 10);
+ GENMASK_U128(9, 10);
+#endif /* TEST_GENMASK_FAILURES */
+#endif /* CONFIG_ARCH_SUPPORTS_INT128 */
+}
+
static void genmask_input_check_test(struct kunit *test)
{
unsigned int x, y;
@@ -56,12 +86,16 @@ static void genmask_input_check_test(struct kunit *test)
/* Valid input */
KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(1, 1));
KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(39, 21));
+ KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(100, 80));
+ KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(110, 65));
+ KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(127, 0));
}
static struct kunit_case bits_test_cases[] = {
KUNIT_CASE(genmask_test),
KUNIT_CASE(genmask_ull_test),
+ KUNIT_CASE(genmask_u128_test),
KUNIT_CASE(genmask_input_check_test),
{}
};
diff --git a/mm/Kconfig b/mm/Kconfig
index 09aebca1cae3..4c9f5ea13271 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -595,6 +595,7 @@ config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
config SPLIT_PTE_PTLOCKS
def_bool y
depends on MMU
+ depends on SMP
depends on NR_CPUS >= 4
depends on !ARM || CPU_CACHE_VIPT
depends on !PARISC || PA20
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig
index fecb8172410c..35b72f88983a 100644
--- a/mm/damon/Kconfig
+++ b/mm/damon/Kconfig
@@ -9,7 +9,7 @@ config DAMON
access frequency of each memory region. The information can be useful
for performance-centric DRAM level memory management.
- See https://damonitor.github.io/doc/html/latest-damon/index.html for
+ See https://www.kernel.org/doc/html/latest/mm/damon/index.html for
more information.
config DAMON_KUNIT_TEST
diff --git a/mm/filemap.c b/mm/filemap.c
index bbaed3dd5049..36d22968be9a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2196,6 +2196,10 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
if (xa_is_value(folio))
goto update_start;
+ /* If we landed in the middle of a THP, continue at its end. */
+ if (xa_is_sibling(folio))
+ goto update_start;
+
if (!folio_try_get(folio))
goto retry;
diff --git a/mm/gup.c b/mm/gup.c
index 8232c8c9c372..a82890b46a36 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -3700,6 +3700,7 @@ long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
ret = PTR_ERR(folio);
if (ret != -EEXIST)
goto err;
+ folio = NULL;
}
}
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0580ac9e47b9..3ca89e0279a7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -4115,7 +4115,6 @@ out:
static const struct file_operations split_huge_pages_fops = {
.owner = THIS_MODULE,
.write = split_huge_pages_write,
- .llseek = no_llseek,
};
static int __init split_huge_pages_debugfs(void)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index def84d8bcf2d..190fa05635f4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2390,6 +2390,23 @@ struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h,
return folio;
}
+struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask, gfp_t gfp_mask)
+{
+ struct folio *folio;
+
+ spin_lock_irq(&hugetlb_lock);
+ folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid,
+ nmask);
+ if (folio) {
+ VM_BUG_ON(!h->resv_huge_pages);
+ h->resv_huge_pages--;
+ }
+
+ spin_unlock_irq(&hugetlb_lock);
+ return folio;
+}
+
/* folio migration callback function */
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback)
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
index 451991a3a8f2..6370c5207d1a 100644
--- a/mm/kfence/report.c
+++ b/mm/kfence/report.c
@@ -109,7 +109,7 @@ static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadat
const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track;
u64 ts_sec = track->ts_nsec;
unsigned long rem_nsec = do_div(ts_sec, NSEC_PER_SEC);
- u64 interval_nsec = local_clock() - meta->alloc_track.ts_nsec;
+ u64 interval_nsec = local_clock() - track->ts_nsec;
unsigned long rem_interval_nsec = do_div(interval_nsec, NSEC_PER_SEC);
/* Timestamp matches printk timestamp format. */
diff --git a/mm/memfd.c b/mm/memfd.c
index e7b7c5294d59..c17c3ea701a1 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -79,23 +79,25 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
* alloc from. Also, the folio will be pinned for an indefinite
* amount of time, so it is not expected to be migrated away.
*/
- gfp_mask = htlb_alloc_mask(hstate_file(memfd));
+ struct hstate *h = hstate_file(memfd);
+
+ gfp_mask = htlb_alloc_mask(h);
gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE);
+ idx >>= huge_page_order(h);
- folio = alloc_hugetlb_folio_nodemask(hstate_file(memfd),
- numa_node_id(),
- NULL,
- gfp_mask,
- false);
- if (folio && folio_try_get(folio)) {
+ folio = alloc_hugetlb_folio_reserve(h,
+ numa_node_id(),
+ NULL,
+ gfp_mask);
+ if (folio) {
err = hugetlb_add_to_page_cache(folio,
memfd->f_mapping,
idx);
if (err) {
folio_put(folio);
- free_huge_folio(folio);
return ERR_PTR(err);
}
+ folio_unlock(folio);
return folio;
}
return ERR_PTR(-ENOMEM);
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 9842acebd05e..fc14fe53e9b7 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -768,10 +768,10 @@ int mt_set_default_dram_perf(int nid, struct access_coordinate *perf,
pr_info(
"memory-tiers: the performance of DRAM node %d mismatches that of the reference\n"
"DRAM node %d.\n", nid, default_dram_perf_ref_nid);
- pr_info(" performance of reference DRAM node %d:\n",
- default_dram_perf_ref_nid);
+ pr_info(" performance of reference DRAM node %d from %s:\n",
+ default_dram_perf_ref_nid, default_dram_perf_ref_source);
dump_hmem_attrs(&default_dram_perf, " ");
- pr_info(" performance of DRAM node %d:\n", nid);
+ pr_info(" performance of DRAM node %d from %s:\n", nid, source);
dump_hmem_attrs(perf, " ");
pr_info(
" disable default DRAM node performance based abstract distance algorithm.\n");
diff --git a/mm/migrate.c b/mm/migrate.c
index dfdb3a136bf8..df91248755e4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1196,7 +1196,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
int rc = -EAGAIN;
int old_page_state = 0;
struct anon_vma *anon_vma = NULL;
- bool is_lru = !__folio_test_movable(src);
+ bool is_lru = data_race(!__folio_test_movable(src));
bool locked = false;
bool dst_locked = false;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 3c8b78d9c4d1..d1b5705dc0c6 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1254,7 +1254,7 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen,
colon_p = memchr(name, ':', namelen);
if (delim_p && colon_p)
- end = delim_p < colon_p ? delim_p : colon_p;
+ end = min(delim_p, colon_p);
else if (!delim_p && colon_p)
end = colon_p;
else {
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 25b8a67a63a4..85149c774505 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -187,7 +187,6 @@ static const struct file_operations minstrel_ht_stat_fops = {
.open = minstrel_ht_stats_open,
.read = minstrel_stats_read,
.release = minstrel_stats_release,
- .llseek = no_llseek,
};
static char *
@@ -323,7 +322,6 @@ static const struct file_operations minstrel_ht_stat_csv_fops = {
.open = minstrel_ht_stats_csv_open,
.read = minstrel_stats_read,
.release = minstrel_stats_release,
- .llseek = no_llseek,
};
void
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 13a5126bc36e..7d3e82e4c2fc 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1394,7 +1394,6 @@ static const struct file_operations rfkill_fops = {
.release = rfkill_fop_release,
.unlocked_ioctl = rfkill_fop_ioctl,
.compat_ioctl = compat_ptr_ioctl,
- .llseek = no_llseek,
};
#define RFKILL_NAME "rfkill"
diff --git a/net/socket.c b/net/socket.c
index 7b046dd3e9a7..601ad74930ef 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -153,7 +153,6 @@ static void sock_show_fdinfo(struct seq_file *m, struct file *f)
static const struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read_iter = sock_read_iter,
.write_iter = sock_write_iter,
.poll = sock_poll,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4f31e73dc34d..1bd3e531b0e0 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1592,7 +1592,6 @@ static int cache_release_procfs(struct inode *inode, struct file *filp)
}
static const struct proc_ops cache_channel_proc_ops = {
- .proc_lseek = no_llseek,
.proc_read = cache_read_procfs,
.proc_write = cache_write_procfs,
.proc_poll = cache_poll_procfs,
@@ -1658,7 +1657,6 @@ static const struct proc_ops cache_flush_proc_ops = {
.proc_read = read_flush_procfs,
.proc_write = write_flush_procfs,
.proc_release = release_flush_procfs,
- .proc_lseek = no_llseek,
};
static void remove_cache_proc_entries(struct cache_detail *cd)
@@ -1811,7 +1809,6 @@ static int cache_release_pipefs(struct inode *inode, struct file *filp)
const struct file_operations cache_file_operations_pipefs = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = cache_read_pipefs,
.write = cache_write_pipefs,
.poll = cache_poll_pipefs,
@@ -1877,7 +1874,6 @@ const struct file_operations cache_flush_operations_pipefs = {
.read = read_flush_pipefs,
.write = write_flush_pipefs,
.release = release_flush_pipefs,
- .llseek = no_llseek,
};
int sunrpc_cache_register_pipefs(struct dentry *parent,
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 910a5d850d04..7ce3721c06ca 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -385,7 +385,6 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static const struct file_operations rpc_pipe_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = rpc_pipe_read,
.write = rpc_pipe_write,
.poll = rpc_pipe_poll,
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index b382c696c877..59eefe2fed10 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -927,7 +927,6 @@ static const struct file_operations mtty_save_fops = {
.unlocked_ioctl = mtty_precopy_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.release = mtty_release_migf,
- .llseek = no_llseek,
};
static void mtty_save_state(struct mdev_state *mdev_state)
@@ -1082,7 +1081,6 @@ static const struct file_operations mtty_resume_fops = {
.owner = THIS_MODULE,
.write = mtty_resume_write,
.release = mtty_release_migf,
- .llseek = no_llseek,
};
static struct mtty_migration_file *
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index 4d81ed9af294..d97720943189 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -53,9 +53,11 @@ $(foreach x, % :, $(if $(findstring $x, $(dst)), \
$(error module installation path cannot contain '$x')))
suffix-y :=
+ifdef CONFIG_MODULE_COMPRESS_ALL
suffix-$(CONFIG_MODULE_COMPRESS_GZIP) := .gz
suffix-$(CONFIG_MODULE_COMPRESS_XZ) := .xz
suffix-$(CONFIG_MODULE_COMPRESS_ZSTD) := .zst
+endif
modules := $(patsubst $(extmod_prefix)%.o, $(dst)/%.ko$(suffix-y), $(modules))
install-$(CONFIG_MODULES) += $(modules)
diff --git a/scripts/coccinelle/api/stream_open.cocci b/scripts/coccinelle/api/stream_open.cocci
index df00d6619b06..50ab60c81f13 100644
--- a/scripts/coccinelle/api/stream_open.cocci
+++ b/scripts/coccinelle/api/stream_open.cocci
@@ -131,7 +131,6 @@ identifier llseek_f;
identifier fops0.fops;
@@
struct file_operations fops = {
- .llseek = no_llseek,
};
@ has_noop_llseek @
diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci
index 5e729f187f22..375045086912 100644
--- a/scripts/coccinelle/api/string_choices.cocci
+++ b/scripts/coccinelle/api/string_choices.cocci
@@ -14,23 +14,18 @@ expression E;
- ((E == 1) ? "" : "s")
+ str_plural(E)
|
-- ((E != 1) ? "s" : "")
-+ str_plural(E)
-|
- ((E > 1) ? "s" : "")
+ str_plural(E)
)
-@str_plural_r depends on !patch exists@
+@str_plural_r depends on !patch@
expression E;
position P;
@@
(
-* ((E@P == 1) ? "" : "s")
-|
-* ((E@P != 1) ? "s" : "")
+* (E@P == 1) ? "" : "s"
|
-* ((E@P > 1) ? "s" : "")
+* (E@P > 1) ? "s" : ""
)
@script:python depends on report@
@@ -40,21 +35,17 @@ e << str_plural_r.E;
coccilib.report.print_report(p[0], "opportunity for str_plural(%s)" % e)
-@str_up_down depends on patch@
+@str_up_down depends on patch disable neg_if_exp@
expression E;
@@
-(
- ((E) ? "up" : "down")
+ str_up_down(E)
-)
-@str_up_down_r depends on !patch exists@
+@str_up_down_r depends on !patch disable neg_if_exp@
expression E;
position P;
@@
-(
-* ((E@P) ? "up" : "down")
-)
+* E@P ? "up" : "down"
@script:python depends on report@
p << str_up_down_r.P;
@@ -63,21 +54,17 @@ e << str_up_down_r.E;
coccilib.report.print_report(p[0], "opportunity for str_up_down(%s)" % e)
-@str_down_up depends on patch@
+@str_down_up depends on patch disable neg_if_exp@
expression E;
@@
-(
- ((E) ? "down" : "up")
+ str_down_up(E)
-)
-@str_down_up_r depends on !patch exists@
+@str_down_up_r depends on !patch disable neg_if_exp@
expression E;
position P;
@@
-(
-* ((E@P) ? "down" : "up")
-)
+* E@P ? "down" : "up"
@script:python depends on report@
p << str_down_up_r.P;
@@ -85,3 +72,231 @@ e << str_down_up_r.E;
@@
coccilib.report.print_report(p[0], "opportunity for str_down_up(%s)" % e)
+
+@str_true_false depends on patch disable neg_if_exp@
+expression E;
+@@
+- ((E) ? "true" : "false")
++ str_true_false(E)
+
+@str_true_false_r depends on !patch disable neg_if_exp@
+expression E;
+position P;
+@@
+* E@P ? "true" : "false"
+
+@script:python depends on report@
+p << str_true_false_r.P;
+e << str_true_false_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_true_false(%s)" % e)
+
+@str_false_true depends on patch disable neg_if_exp@
+expression E;
+@@
+- ((E) ? "false" : "true")
++ str_false_true(E)
+
+@str_false_true_r depends on !patch disable neg_if_exp@
+expression E;
+position P;
+@@
+* E@P ? "false" : "true"
+
+@script:python depends on report@
+p << str_false_true_r.P;
+e << str_false_true_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_false_true(%s)" % e)
+
+@str_hi_lo depends on patch disable neg_if_exp@
+expression E;
+@@
+- ((E) ? "hi" : "lo")
++ str_hi_lo(E)
+
+@str_hi_lo_r depends on !patch disable neg_if_exp@
+expression E;
+position P;
+@@
+* E@P ? "hi" : "lo"
+
+@script:python depends on report@
+p << str_hi_lo_r.P;
+e << str_hi_lo_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_hi_lo(%s)" % e)
+
+@str_high_low depends on patch disable neg_if_exp@
+expression E;
+@@
+- ((E) ? "high" : "low")
++ str_high_low(E)
+
+@str_high_low_r depends on !patch disable neg_if_exp@
+expression E;
+position P;
+@@
+* E@P ? "high" : "low"
+
+@script:python depends on report@
+p << str_high_low_r.P;
+e << str_high_low_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_high_low(%s)" % e)
+
+@str_lo_hi depends on patch disable neg_if_exp@
+expression E;
+@@
+- ((E) ? "lo" : "hi")
++ str_lo_hi(E)
+
+@str_lo_hi_r depends on !patch disable neg_if_exp@
+expression E;
+position P;
+@@
+* E@P ? "lo" : "hi"
+
+@script:python depends on report@
+p << str_lo_hi_r.P;
+e << str_lo_hi_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_lo_hi(%s)" % e)
+
+@str_low_high depends on patch disable neg_if_exp@
+expression E;
+@@
+- ((E) ? "low" : "high")
++ str_low_high(E)
+
+@str_low_high_r depends on !patch disable neg_if_exp@
+expression E;
+position P;
+@@
+* E@P ? "low" : "high"
+
+@script:python depends on report@
+p << str_low_high_r.P;
+e << str_low_high_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_low_high(%s)" % e)
+
+@str_enable_disable depends on patch@
+expression E;
+@@
+- ((E) ? "enable" : "disable")
++ str_enable_disable(E)
+
+@str_enable_disable_r depends on !patch@
+expression E;
+position P;
+@@
+* E@P ? "enable" : "disable"
+
+@script:python depends on report@
+p << str_enable_disable_r.P;
+e << str_enable_disable_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_enable_disable(%s)" % e)
+
+@str_enabled_disabled depends on patch@
+expression E;
+@@
+- ((E) ? "enabled" : "disabled")
++ str_enabled_disabled(E)
+
+@str_enabled_disabled_r depends on !patch@
+expression E;
+position P;
+@@
+* E@P ? "enabled" : "disabled"
+
+@script:python depends on report@
+p << str_enabled_disabled_r.P;
+e << str_enabled_disabled_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_enabled_disabled(%s)" % e)
+
+@str_read_write depends on patch disable neg_if_exp@
+expression E;
+@@
+- ((E) ? "read" : "write")
++ str_read_write(E)
+
+@str_read_write_r depends on !patch disable neg_if_exp@
+expression E;
+position P;
+@@
+* E@P ? "read" : "write"
+
+@script:python depends on report@
+p << str_read_write_r.P;
+e << str_read_write_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_read_write(%s)" % e)
+
+@str_write_read depends on patch disable neg_if_exp@
+expression E;
+@@
+- ((E) ? "write" : "read")
++ str_write_read(E)
+
+@str_write_read_r depends on !patch disable neg_if_exp@
+expression E;
+position P;
+@@
+* E@P ? "write" : "read"
+
+@script:python depends on report@
+p << str_write_read_r.P;
+e << str_write_read_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_write_read(%s)" % e)
+
+@str_on_off depends on patch@
+expression E;
+@@
+- ((E) ? "on" : "off")
++ str_on_off(E)
+
+@str_on_off_r depends on !patch@
+expression E;
+position P;
+@@
+* E@P ? "on" : "off"
+
+@script:python depends on report@
+p << str_on_off_r.P;
+e << str_on_off_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_on_off(%s)" % e)
+
+@str_yes_no depends on patch@
+expression E;
+@@
+- ((E) ? "yes" : "no")
++ str_yes_no(E)
+
+@str_yes_no_r depends on !patch@
+expression E;
+position P;
+@@
+* E@P ? "yes" : "no"
+
+@script:python depends on report@
+p << str_yes_no_r.P;
+e << str_yes_no_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_yes_no(%s)" % e)
diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig
index 1e0dd1a6d0b0..90eccc6cd464 100644
--- a/security/tomoyo/Kconfig
+++ b/security/tomoyo/Kconfig
@@ -13,6 +13,21 @@ config SECURITY_TOMOYO
found at <https://tomoyo.sourceforge.net/>.
If you are unsure how to answer this question, answer N.
+config SECURITY_TOMOYO_LKM
+ bool "Cut out most of TOMOYO's code to a loadable kernel module"
+ default n
+ depends on SECURITY_TOMOYO
+ depends on MODULES
+ help
+ Say Y here if you want to include TOMOYO without bloating
+ vmlinux file. If you say Y, most of TOMOYO code is cut out to
+ a loadable kernel module named tomoyo.ko . This option will be
+ useful for kernels built by Linux distributors where TOMOYO is
+ included but TOMOYO is not enabled by default. Please be sure
+ to explicitly load tomoyo.ko if you want to activate TOMOYO
+ without calling userspace policy loader, for tomoyo.ko is
+ loaded immediately before calling userspace policy loader.
+
config SECURITY_TOMOYO_MAX_ACCEPT_ENTRY
int "Default maximal count for learning mode"
default 2048
diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile
index 55c67b9846a9..287a7d16fa15 100644
--- a/security/tomoyo/Makefile
+++ b/security/tomoyo/Makefile
@@ -1,5 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y = audit.o common.o condition.o domain.o environ.o file.o gc.o group.o load_policy.o memory.o mount.o network.o realpath.o securityfs_if.o tomoyo.o util.o
+tomoyo-objs := audit.o common.o condition.o domain.o environ.o file.o gc.o group.o memory.o mount.o network.o proxy.o realpath.o securityfs_if.o util.o
+obj-y += init.o load_policy.o
+ifdef CONFIG_SECURITY_TOMOYO_LKM
+obj-m += tomoyo.o
+else
+obj-y += tomoyo.o
+endif
targets += builtin-policy.h
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index 5c7b059a332a..c0ef014f8009 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -998,8 +998,13 @@ static bool tomoyo_select_domain(struct tomoyo_io_buffer *head,
p = find_task_by_pid_ns(pid, &init_pid_ns);
else
p = find_task_by_vpid(pid);
- if (p)
+ if (p) {
domain = tomoyo_task(p)->domain_info;
+#ifdef CONFIG_SECURITY_TOMOYO_LKM
+ if (!domain)
+ domain = &tomoyo_kernel_domain;
+#endif
+ }
rcu_read_unlock();
} else if (!strncmp(data, "domain=", 7)) {
if (tomoyo_domain_def(data + 7))
@@ -1710,8 +1715,13 @@ static void tomoyo_read_pid(struct tomoyo_io_buffer *head)
p = find_task_by_pid_ns(pid, &init_pid_ns);
else
p = find_task_by_vpid(pid);
- if (p)
+ if (p) {
domain = tomoyo_task(p)->domain_info;
+#ifdef CONFIG_SECURITY_TOMOYO_LKM
+ if (!domain)
+ domain = &tomoyo_kernel_domain;
+#endif
+ }
rcu_read_unlock();
if (!domain)
return;
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index 0e8e2e959aef..4f6c52a9f478 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -978,6 +978,7 @@ int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile,
int tomoyo_init_request_info(struct tomoyo_request_info *r,
struct tomoyo_domain_info *domain,
const u8 index);
+int __init tomoyo_interface_init(void);
int tomoyo_mkdev_perm(const u8 operation, const struct path *path,
const unsigned int mode, unsigned int dev);
int tomoyo_mount_permission(const char *dev_name, const struct path *path,
@@ -1214,10 +1215,14 @@ static inline void tomoyo_put_group(struct tomoyo_group *group)
*
* Returns pointer to "struct tomoyo_task" for specified thread.
*/
+#ifdef CONFIG_SECURITY_TOMOYO_LKM
+extern struct tomoyo_task *tomoyo_task(struct task_struct *task);
+#else
static inline struct tomoyo_task *tomoyo_task(struct task_struct *task)
{
return task->security + tomoyo_blob_sizes.lbs_task;
}
+#endif
/**
* tomoyo_same_name_union - Check for duplicated "struct tomoyo_name_union" entry.
@@ -1284,4 +1289,71 @@ static inline struct tomoyo_policy_namespace *tomoyo_current_namespace(void)
pos = srcu_dereference((head)->next, &tomoyo_ss); \
for ( ; pos != (head); pos = srcu_dereference(pos->next, &tomoyo_ss))
+#ifdef CONFIG_SECURITY_TOMOYO_LKM
+
+#define LSM_HOOK(RET, DEFAULT, NAME, ...) typedef RET (NAME##_t)(__VA_ARGS__);
+#include <linux/lsm_hook_defs.h>
+#undef LSM_HOOK
+
+struct tomoyo_hooks {
+ cred_prepare_t *cred_prepare;
+ bprm_committed_creds_t *bprm_committed_creds;
+ task_alloc_t *task_alloc;
+ task_free_t *task_free;
+ bprm_check_security_t *bprm_check_security;
+ file_fcntl_t *file_fcntl;
+ file_open_t *file_open;
+ file_truncate_t *file_truncate;
+ path_truncate_t *path_truncate;
+ path_unlink_t *path_unlink;
+ path_mkdir_t *path_mkdir;
+ path_rmdir_t *path_rmdir;
+ path_symlink_t *path_symlink;
+ path_mknod_t *path_mknod;
+ path_link_t *path_link;
+ path_rename_t *path_rename;
+ inode_getattr_t *inode_getattr;
+ file_ioctl_t *file_ioctl;
+ file_ioctl_compat_t *file_ioctl_compat;
+ path_chmod_t *path_chmod;
+ path_chown_t *path_chown;
+ path_chroot_t *path_chroot;
+ sb_mount_t *sb_mount;
+ sb_umount_t *sb_umount;
+ sb_pivotroot_t *sb_pivotroot;
+ socket_bind_t *socket_bind;
+ socket_connect_t *socket_connect;
+ socket_listen_t *socket_listen;
+ socket_sendmsg_t *socket_sendmsg;
+};
+
+extern void tomoyo_register_hooks(const struct tomoyo_hooks *tomoyo_hooks);
+
+struct tomoyo_operations {
+ void (*check_profile)(void);
+ int enabled;
+};
+
+extern struct tomoyo_operations tomoyo_ops;
+
+/*
+ * Temporary hack: functions needed by tomoyo.ko . This will be removed
+ * after all functions are marked as EXPORT_STMBOL_GPL().
+ */
+struct tomoyo_tmp_exports {
+ struct task_struct * (*find_task_by_vpid)(pid_t nr);
+ struct task_struct * (*find_task_by_pid_ns)(pid_t nr, struct pid_namespace *ns);
+ void (*put_filesystem)(struct file_system_type *fs);
+ struct file * (*get_mm_exe_file)(struct mm_struct *mm);
+ char * (*d_absolute_path)(const struct path *path, char *buf, int buflen);
+};
+extern const struct tomoyo_tmp_exports tomoyo_tmp_exports;
+#define find_task_by_vpid tomoyo_tmp_exports.find_task_by_vpid
+#define find_task_by_pid_ns tomoyo_tmp_exports.find_task_by_pid_ns
+#define put_filesystem tomoyo_tmp_exports.put_filesystem
+#define get_mm_exe_file tomoyo_tmp_exports.get_mm_exe_file
+#define d_absolute_path tomoyo_tmp_exports.d_absolute_path
+
+#endif /* defined(CONFIG_SECURITY_TOMOYO_LKM) */
+
#endif /* !defined(_SECURITY_TOMOYO_COMMON_H) */
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 90b53500a236..aed9e3ef2c9e 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -723,10 +723,13 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm)
ee->r.obj = &ee->obj;
ee->obj.path1 = bprm->file->f_path;
/* Get symlink's pathname of program. */
- retval = -ENOENT;
exename.name = tomoyo_realpath_nofollow(original_name);
- if (!exename.name)
- goto out;
+ if (!exename.name) {
+ /* Fallback to realpath if symlink's pathname does not exist. */
+ exename.name = tomoyo_realpath_from_path(&bprm->file->f_path);
+ if (!exename.name)
+ goto out;
+ }
tomoyo_fill_path_info(&exename);
retry:
/* Check 'aggregator' directive. */
diff --git a/security/tomoyo/gc.c b/security/tomoyo/gc.c
index 026e29ea3796..6eccca150839 100644
--- a/security/tomoyo/gc.c
+++ b/security/tomoyo/gc.c
@@ -9,6 +9,9 @@
#include <linux/kthread.h>
#include <linux/slab.h>
+/* Lock for GC. */
+DEFINE_SRCU(tomoyo_ss);
+
/**
* tomoyo_memory_free - Free memory for elements.
*
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/hooks.h
index 04a92c3d65d4..58929bb71477 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/hooks.h
@@ -1,12 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * security/tomoyo/tomoyo.c
+ * security/tomoyo/hooks.h
*
* Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
-#include <linux/lsm_hooks.h>
-#include <uapi/linux/lsm.h>
#include "common.h"
/**
@@ -18,10 +16,6 @@ struct tomoyo_domain_info *tomoyo_domain(void)
{
struct tomoyo_task *s = tomoyo_task(current);
- if (s->old_domain_info && !current->in_execve) {
- atomic_dec(&s->old_domain_info->users);
- s->old_domain_info = NULL;
- }
return s->domain_info;
}
@@ -62,26 +56,6 @@ static void tomoyo_bprm_committed_creds(const struct linux_binprm *bprm)
s->old_domain_info = NULL;
}
-#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
-/**
- * tomoyo_bprm_creds_for_exec - Target for security_bprm_creds_for_exec().
- *
- * @bprm: Pointer to "struct linux_binprm".
- *
- * Returns 0.
- */
-static int tomoyo_bprm_creds_for_exec(struct linux_binprm *bprm)
-{
- /*
- * Load policy if /sbin/tomoyo-init exists and /sbin/init is requested
- * for the first time.
- */
- if (!tomoyo_policy_loaded)
- tomoyo_load_policy(bprm->filename);
- return 0;
-}
-#endif
-
/**
* tomoyo_bprm_check_security - Target for security_bprm_check().
*
@@ -501,10 +475,6 @@ static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg,
return tomoyo_socket_sendmsg_permission(sock, msg, size);
}
-struct lsm_blob_sizes tomoyo_blob_sizes __ro_after_init = {
- .lbs_task = sizeof(struct tomoyo_task),
-};
-
/**
* tomoyo_task_alloc - Target for security_task_alloc().
*
@@ -543,81 +513,3 @@ static void tomoyo_task_free(struct task_struct *task)
s->old_domain_info = NULL;
}
}
-
-static const struct lsm_id tomoyo_lsmid = {
- .name = "tomoyo",
- .id = LSM_ID_TOMOYO,
-};
-
-/*
- * tomoyo_security_ops is a "struct security_operations" which is used for
- * registering TOMOYO.
- */
-static struct security_hook_list tomoyo_hooks[] __ro_after_init = {
- LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare),
- LSM_HOOK_INIT(bprm_committed_creds, tomoyo_bprm_committed_creds),
- LSM_HOOK_INIT(task_alloc, tomoyo_task_alloc),
- LSM_HOOK_INIT(task_free, tomoyo_task_free),
-#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
- LSM_HOOK_INIT(bprm_creds_for_exec, tomoyo_bprm_creds_for_exec),
-#endif
- LSM_HOOK_INIT(bprm_check_security, tomoyo_bprm_check_security),
- LSM_HOOK_INIT(file_fcntl, tomoyo_file_fcntl),
- LSM_HOOK_INIT(file_open, tomoyo_file_open),
- LSM_HOOK_INIT(file_truncate, tomoyo_file_truncate),
- LSM_HOOK_INIT(path_truncate, tomoyo_path_truncate),
- LSM_HOOK_INIT(path_unlink, tomoyo_path_unlink),
- LSM_HOOK_INIT(path_mkdir, tomoyo_path_mkdir),
- LSM_HOOK_INIT(path_rmdir, tomoyo_path_rmdir),
- LSM_HOOK_INIT(path_symlink, tomoyo_path_symlink),
- LSM_HOOK_INIT(path_mknod, tomoyo_path_mknod),
- LSM_HOOK_INIT(path_link, tomoyo_path_link),
- LSM_HOOK_INIT(path_rename, tomoyo_path_rename),
- LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr),
- LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl),
- LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl),
- LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod),
- LSM_HOOK_INIT(path_chown, tomoyo_path_chown),
- LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot),
- LSM_HOOK_INIT(sb_mount, tomoyo_sb_mount),
- LSM_HOOK_INIT(sb_umount, tomoyo_sb_umount),
- LSM_HOOK_INIT(sb_pivotroot, tomoyo_sb_pivotroot),
- LSM_HOOK_INIT(socket_bind, tomoyo_socket_bind),
- LSM_HOOK_INIT(socket_connect, tomoyo_socket_connect),
- LSM_HOOK_INIT(socket_listen, tomoyo_socket_listen),
- LSM_HOOK_INIT(socket_sendmsg, tomoyo_socket_sendmsg),
-};
-
-/* Lock for GC. */
-DEFINE_SRCU(tomoyo_ss);
-
-int tomoyo_enabled __ro_after_init = 1;
-
-/**
- * tomoyo_init - Register TOMOYO Linux as a LSM module.
- *
- * Returns 0.
- */
-static int __init tomoyo_init(void)
-{
- struct tomoyo_task *s = tomoyo_task(current);
-
- /* register ourselves with the security framework */
- security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks),
- &tomoyo_lsmid);
- pr_info("TOMOYO Linux initialized\n");
- s->domain_info = &tomoyo_kernel_domain;
- atomic_inc(&tomoyo_kernel_domain.users);
- s->old_domain_info = NULL;
- tomoyo_mm_init();
-
- return 0;
-}
-
-DEFINE_LSM(tomoyo) = {
- .name = "tomoyo",
- .enabled = &tomoyo_enabled,
- .flags = LSM_FLAG_LEGACY_MAJOR,
- .blobs = &tomoyo_blob_sizes,
- .init = tomoyo_init,
-};
diff --git a/security/tomoyo/init.c b/security/tomoyo/init.c
new file mode 100644
index 000000000000..034e7db22d4e
--- /dev/null
+++ b/security/tomoyo/init.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * security/tomoyo/init.c
+ *
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
+ */
+
+#include <linux/lsm_hooks.h>
+#include <uapi/linux/lsm.h>
+#include "common.h"
+
+#ifndef CONFIG_SECURITY_TOMOYO_LKM
+
+#include "hooks.h"
+
+#else
+
+#define DEFINE_STATIC_CALL_PROXY(NAME) \
+ static NAME##_t tomoyo_##NAME; \
+ DEFINE_STATIC_CALL_RET0(tomoyo_##NAME, tomoyo_##NAME);
+DEFINE_STATIC_CALL_PROXY(cred_prepare)
+DEFINE_STATIC_CALL_PROXY(bprm_committed_creds)
+DEFINE_STATIC_CALL_PROXY(bprm_check_security)
+DEFINE_STATIC_CALL_PROXY(inode_getattr)
+DEFINE_STATIC_CALL_PROXY(path_truncate)
+DEFINE_STATIC_CALL_PROXY(file_truncate)
+DEFINE_STATIC_CALL_PROXY(path_unlink)
+DEFINE_STATIC_CALL_PROXY(path_mkdir)
+DEFINE_STATIC_CALL_PROXY(path_rmdir)
+DEFINE_STATIC_CALL_PROXY(path_symlink)
+DEFINE_STATIC_CALL_PROXY(path_mknod)
+DEFINE_STATIC_CALL_PROXY(path_link)
+DEFINE_STATIC_CALL_PROXY(path_rename)
+DEFINE_STATIC_CALL_PROXY(file_fcntl)
+DEFINE_STATIC_CALL_PROXY(file_open)
+DEFINE_STATIC_CALL_PROXY(file_ioctl)
+DEFINE_STATIC_CALL_PROXY(path_chmod)
+DEFINE_STATIC_CALL_PROXY(path_chown)
+DEFINE_STATIC_CALL_PROXY(path_chroot)
+DEFINE_STATIC_CALL_PROXY(sb_mount)
+DEFINE_STATIC_CALL_PROXY(sb_umount)
+DEFINE_STATIC_CALL_PROXY(sb_pivotroot)
+DEFINE_STATIC_CALL_PROXY(socket_listen)
+DEFINE_STATIC_CALL_PROXY(socket_connect)
+DEFINE_STATIC_CALL_PROXY(socket_bind)
+DEFINE_STATIC_CALL_PROXY(socket_sendmsg)
+DEFINE_STATIC_CALL_PROXY(task_alloc)
+DEFINE_STATIC_CALL_PROXY(task_free)
+#undef DEFINE_STATIC_CALL_PROXY
+
+static int tomoyo_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp)
+{
+ return static_call(tomoyo_cred_prepare)(new, old, gfp);
+}
+
+static void tomoyo_bprm_committed_creds(const struct linux_binprm *bprm)
+{
+ static_call(tomoyo_bprm_committed_creds)(bprm);
+}
+
+static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
+{
+ return static_call(tomoyo_bprm_check_security)(bprm);
+}
+
+static int tomoyo_inode_getattr(const struct path *path)
+{
+ return static_call(tomoyo_inode_getattr)(path);
+}
+
+static int tomoyo_path_truncate(const struct path *path)
+{
+ return static_call(tomoyo_path_truncate)(path);
+}
+
+static int tomoyo_file_truncate(struct file *file)
+{
+ return static_call(tomoyo_file_truncate)(file);
+}
+
+static int tomoyo_path_unlink(const struct path *parent, struct dentry *dentry)
+{
+ return static_call(tomoyo_path_unlink)(parent, dentry);
+}
+
+static int tomoyo_path_mkdir(const struct path *parent, struct dentry *dentry, umode_t mode)
+{
+ return static_call(tomoyo_path_mkdir)(parent, dentry, mode);
+}
+
+static int tomoyo_path_rmdir(const struct path *parent, struct dentry *dentry)
+{
+ return static_call(tomoyo_path_rmdir)(parent, dentry);
+}
+
+static int tomoyo_path_symlink(const struct path *parent, struct dentry *dentry,
+ const char *old_name)
+{
+ return static_call(tomoyo_path_symlink)(parent, dentry, old_name);
+}
+
+static int tomoyo_path_mknod(const struct path *parent, struct dentry *dentry,
+ umode_t mode, unsigned int dev)
+{
+ return static_call(tomoyo_path_mknod)(parent, dentry, mode, dev);
+}
+
+static int tomoyo_path_link(struct dentry *old_dentry, const struct path *new_dir,
+ struct dentry *new_dentry)
+{
+ return static_call(tomoyo_path_link)(old_dentry, new_dir, new_dentry);
+}
+
+static int tomoyo_path_rename(const struct path *old_parent, struct dentry *old_dentry,
+ const struct path *new_parent, struct dentry *new_dentry,
+ const unsigned int flags)
+{
+ return static_call(tomoyo_path_rename)(old_parent, old_dentry, new_parent, new_dentry, flags);
+}
+
+static int tomoyo_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return static_call(tomoyo_file_fcntl)(file, cmd, arg);
+}
+
+static int tomoyo_file_open(struct file *f)
+{
+ return static_call(tomoyo_file_open)(f);
+}
+
+static int tomoyo_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return static_call(tomoyo_file_ioctl)(file, cmd, arg);
+}
+
+static int tomoyo_path_chmod(const struct path *path, umode_t mode)
+{
+ return static_call(tomoyo_path_chmod)(path, mode);
+}
+
+static int tomoyo_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
+{
+ return static_call(tomoyo_path_chown)(path, uid, gid);
+}
+
+static int tomoyo_path_chroot(const struct path *path)
+{
+ return static_call(tomoyo_path_chroot)(path);
+}
+
+static int tomoyo_sb_mount(const char *dev_name, const struct path *path,
+ const char *type, unsigned long flags, void *data)
+{
+ return static_call(tomoyo_sb_mount)(dev_name, path, type, flags, data);
+}
+
+static int tomoyo_sb_umount(struct vfsmount *mnt, int flags)
+{
+ return static_call(tomoyo_sb_umount)(mnt, flags);
+}
+
+static int tomoyo_sb_pivotroot(const struct path *old_path, const struct path *new_path)
+{
+ return static_call(tomoyo_sb_pivotroot)(old_path, new_path);
+}
+
+static int tomoyo_socket_listen(struct socket *sock, int backlog)
+{
+ return static_call(tomoyo_socket_listen)(sock, backlog);
+}
+
+static int tomoyo_socket_connect(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+ return static_call(tomoyo_socket_connect)(sock, addr, addr_len);
+}
+
+static int tomoyo_socket_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+ return static_call(tomoyo_socket_bind)(sock, addr, addr_len);
+}
+
+static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size)
+{
+ return static_call(tomoyo_socket_sendmsg)(sock, msg, size);
+}
+
+static int tomoyo_task_alloc(struct task_struct *task, unsigned long clone_flags)
+{
+ return static_call(tomoyo_task_alloc)(task, clone_flags);
+}
+
+static void tomoyo_task_free(struct task_struct *task)
+{
+ static_call(tomoyo_task_free)(task);
+}
+
+void tomoyo_register_hooks(const struct tomoyo_hooks *tomoyo_hooks)
+{
+ static void *registered;
+
+ if (cmpxchg(&registered, NULL, &registered))
+ panic("%s was called twice!\n", __func__);
+ static_call_update(tomoyo_task_free, tomoyo_hooks->task_free);
+ static_call_update(tomoyo_task_alloc, tomoyo_hooks->task_alloc);
+ static_call_update(tomoyo_cred_prepare, tomoyo_hooks->cred_prepare);
+ static_call_update(tomoyo_bprm_committed_creds, tomoyo_hooks->bprm_committed_creds);
+ static_call_update(tomoyo_bprm_check_security, tomoyo_hooks->bprm_check_security);
+ static_call_update(tomoyo_inode_getattr, tomoyo_hooks->inode_getattr);
+ static_call_update(tomoyo_path_truncate, tomoyo_hooks->path_truncate);
+ static_call_update(tomoyo_file_truncate, tomoyo_hooks->file_truncate);
+ static_call_update(tomoyo_path_unlink, tomoyo_hooks->path_unlink);
+ static_call_update(tomoyo_path_mkdir, tomoyo_hooks->path_mkdir);
+ static_call_update(tomoyo_path_rmdir, tomoyo_hooks->path_rmdir);
+ static_call_update(tomoyo_path_symlink, tomoyo_hooks->path_symlink);
+ static_call_update(tomoyo_path_mknod, tomoyo_hooks->path_mknod);
+ static_call_update(tomoyo_path_link, tomoyo_hooks->path_link);
+ static_call_update(tomoyo_path_rename, tomoyo_hooks->path_rename);
+ static_call_update(tomoyo_file_fcntl, tomoyo_hooks->file_fcntl);
+ static_call_update(tomoyo_file_open, tomoyo_hooks->file_open);
+ static_call_update(tomoyo_file_ioctl, tomoyo_hooks->file_ioctl);
+ static_call_update(tomoyo_path_chmod, tomoyo_hooks->path_chmod);
+ static_call_update(tomoyo_path_chown, tomoyo_hooks->path_chown);
+ static_call_update(tomoyo_path_chroot, tomoyo_hooks->path_chroot);
+ static_call_update(tomoyo_sb_mount, tomoyo_hooks->sb_mount);
+ static_call_update(tomoyo_sb_umount, tomoyo_hooks->sb_umount);
+ static_call_update(tomoyo_sb_pivotroot, tomoyo_hooks->sb_pivotroot);
+ static_call_update(tomoyo_socket_listen, tomoyo_hooks->socket_listen);
+ static_call_update(tomoyo_socket_connect, tomoyo_hooks->socket_connect);
+ static_call_update(tomoyo_socket_bind, tomoyo_hooks->socket_bind);
+ static_call_update(tomoyo_socket_sendmsg, tomoyo_hooks->socket_sendmsg);
+}
+EXPORT_SYMBOL_GPL(tomoyo_register_hooks);
+
+/*
+ * Temporary hack: functions needed by tomoyo.ko . This hack will be removed
+ * after all functions are marked as EXPORT_STMBOL_GPL().
+ */
+#undef find_task_by_vpid
+#undef find_task_by_pid_ns
+#undef put_filesystem
+#undef get_mm_exe_file
+#undef d_absolute_path
+const struct tomoyo_tmp_exports tomoyo_tmp_exports = {
+ .find_task_by_vpid = find_task_by_vpid,
+ .find_task_by_pid_ns = find_task_by_pid_ns,
+ .put_filesystem = put_filesystem,
+ .get_mm_exe_file = get_mm_exe_file,
+ .d_absolute_path = d_absolute_path,
+};
+EXPORT_SYMBOL_GPL(tomoyo_tmp_exports);
+
+#endif
+
+#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+static int tomoyo_bprm_creds_for_exec(struct linux_binprm *bprm)
+{
+ /*
+ * Load policy if /sbin/tomoyo-init exists and /sbin/init is requested
+ * for the first time.
+ */
+ if (!tomoyo_policy_loaded)
+ tomoyo_load_policy(bprm->filename);
+ return 0;
+}
+#endif
+
+struct lsm_blob_sizes tomoyo_blob_sizes __ro_after_init = {
+ .lbs_task = sizeof(struct tomoyo_task),
+};
+
+static const struct lsm_id tomoyo_lsmid = {
+ .name = "tomoyo",
+ .id = LSM_ID_TOMOYO,
+};
+
+/* tomoyo_hooks is used for registering TOMOYO. */
+static struct security_hook_list tomoyo_hooks[] __ro_after_init = {
+ LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare),
+ LSM_HOOK_INIT(bprm_committed_creds, tomoyo_bprm_committed_creds),
+ LSM_HOOK_INIT(task_alloc, tomoyo_task_alloc),
+ LSM_HOOK_INIT(task_free, tomoyo_task_free),
+#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+ LSM_HOOK_INIT(bprm_creds_for_exec, tomoyo_bprm_creds_for_exec),
+#endif
+ LSM_HOOK_INIT(bprm_check_security, tomoyo_bprm_check_security),
+ LSM_HOOK_INIT(file_fcntl, tomoyo_file_fcntl),
+ LSM_HOOK_INIT(file_open, tomoyo_file_open),
+ LSM_HOOK_INIT(file_truncate, tomoyo_file_truncate),
+ LSM_HOOK_INIT(path_truncate, tomoyo_path_truncate),
+ LSM_HOOK_INIT(path_unlink, tomoyo_path_unlink),
+ LSM_HOOK_INIT(path_mkdir, tomoyo_path_mkdir),
+ LSM_HOOK_INIT(path_rmdir, tomoyo_path_rmdir),
+ LSM_HOOK_INIT(path_symlink, tomoyo_path_symlink),
+ LSM_HOOK_INIT(path_mknod, tomoyo_path_mknod),
+ LSM_HOOK_INIT(path_link, tomoyo_path_link),
+ LSM_HOOK_INIT(path_rename, tomoyo_path_rename),
+ LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr),
+ LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl),
+ LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl),
+ LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod),
+ LSM_HOOK_INIT(path_chown, tomoyo_path_chown),
+ LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot),
+ LSM_HOOK_INIT(sb_mount, tomoyo_sb_mount),
+ LSM_HOOK_INIT(sb_umount, tomoyo_sb_umount),
+ LSM_HOOK_INIT(sb_pivotroot, tomoyo_sb_pivotroot),
+ LSM_HOOK_INIT(socket_bind, tomoyo_socket_bind),
+ LSM_HOOK_INIT(socket_connect, tomoyo_socket_connect),
+ LSM_HOOK_INIT(socket_listen, tomoyo_socket_listen),
+ LSM_HOOK_INIT(socket_sendmsg, tomoyo_socket_sendmsg),
+};
+
+int tomoyo_enabled __ro_after_init = 1;
+
+/* Has /sbin/init started? */
+bool tomoyo_policy_loaded;
+
+#ifdef CONFIG_SECURITY_TOMOYO_LKM
+EXPORT_SYMBOL_GPL(tomoyo_blob_sizes);
+EXPORT_SYMBOL_GPL(tomoyo_policy_loaded);
+
+struct tomoyo_operations tomoyo_ops;
+EXPORT_SYMBOL_GPL(tomoyo_ops);
+
+/**
+ * tomoyo_init - Reserve hooks for TOMOYO Linux.
+ *
+ * Returns 0.
+ */
+static int __init tomoyo_init(void)
+{
+ /* register ourselves with the security framework */
+ security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), &tomoyo_lsmid);
+ tomoyo_ops.enabled = tomoyo_enabled;
+ pr_info("Hooks for initializing TOMOYO Linux are ready\n");
+ return 0;
+}
+#else
+/**
+ * tomoyo_init - Register TOMOYO Linux as a LSM module.
+ *
+ * Returns 0.
+ */
+static int __init tomoyo_init(void)
+{
+ struct tomoyo_task *s = tomoyo_task(current);
+
+ /* register ourselves with the security framework */
+ security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks),
+ &tomoyo_lsmid);
+ pr_info("TOMOYO Linux initialized\n");
+ s->domain_info = &tomoyo_kernel_domain;
+ atomic_inc(&tomoyo_kernel_domain.users);
+ s->old_domain_info = NULL;
+ tomoyo_mm_init();
+
+ return 0;
+}
+#endif
+
+DEFINE_LSM(tomoyo) = {
+ .name = "tomoyo",
+ .enabled = &tomoyo_enabled,
+ .flags = LSM_FLAG_LEGACY_MAJOR,
+ .blobs = &tomoyo_blob_sizes,
+ .init = tomoyo_init,
+};
diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c
index 363b65be87ab..6a2a72354a64 100644
--- a/security/tomoyo/load_policy.c
+++ b/security/tomoyo/load_policy.c
@@ -97,6 +97,14 @@ void tomoyo_load_policy(const char *filename)
if (!tomoyo_policy_loader_exists())
return;
done = true;
+#ifdef CONFIG_SECURITY_TOMOYO_LKM
+ /* Load tomoyo.ko if not yet loaded. */
+ if (!tomoyo_ops.check_profile)
+ request_module("tomoyo");
+ /* Check if tomoyo.ko was successfully loaded. */
+ if (!tomoyo_ops.check_profile)
+ panic("Failed to load tomoyo module.");
+#endif
pr_info("Calling %s to load policy. Please wait.\n", tomoyo_loader);
argv[0] = (char *) tomoyo_loader;
argv[1] = NULL;
@@ -104,7 +112,11 @@ void tomoyo_load_policy(const char *filename)
envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
envp[2] = NULL;
call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+#ifdef CONFIG_SECURITY_TOMOYO_LKM
+ tomoyo_ops.check_profile();
+#else
tomoyo_check_profile();
+#endif
}
#endif
diff --git a/security/tomoyo/proxy.c b/security/tomoyo/proxy.c
new file mode 100644
index 000000000000..1618cc0f2af8
--- /dev/null
+++ b/security/tomoyo/proxy.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * security/tomoyo/proxy.c
+ *
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
+ */
+
+#include <linux/security.h>
+#include "common.h"
+
+#ifdef CONFIG_SECURITY_TOMOYO_LKM
+
+struct tomoyo_task *tomoyo_task(struct task_struct *task)
+{
+ struct tomoyo_task *s = task->security + tomoyo_blob_sizes.lbs_task;
+
+ if (unlikely(!s->domain_info)) {
+ if (likely(task == current)) {
+ s->domain_info = &tomoyo_kernel_domain;
+ atomic_inc(&tomoyo_kernel_domain.users);
+ } else {
+ /* Caller handles s->domain_info == NULL case. */
+ }
+ }
+ return s;
+}
+
+#include "hooks.h"
+
+/**
+ * tomoyo_runtime_init - Register TOMOYO Linux as a loadable LSM module.
+ *
+ * Returns 0 if TOMOYO is enabled, -EINVAL otherwise.
+ */
+static int __init tomoyo_runtime_init(void)
+{
+ const struct tomoyo_hooks tomoyo_hooks = {
+ .cred_prepare = tomoyo_cred_prepare,
+ .bprm_committed_creds = tomoyo_bprm_committed_creds,
+ .task_alloc = tomoyo_task_alloc,
+ .task_free = tomoyo_task_free,
+ .bprm_check_security = tomoyo_bprm_check_security,
+ .file_fcntl = tomoyo_file_fcntl,
+ .file_open = tomoyo_file_open,
+ .file_truncate = tomoyo_file_truncate,
+ .path_truncate = tomoyo_path_truncate,
+ .path_unlink = tomoyo_path_unlink,
+ .path_mkdir = tomoyo_path_mkdir,
+ .path_rmdir = tomoyo_path_rmdir,
+ .path_symlink = tomoyo_path_symlink,
+ .path_mknod = tomoyo_path_mknod,
+ .path_link = tomoyo_path_link,
+ .path_rename = tomoyo_path_rename,
+ .inode_getattr = tomoyo_inode_getattr,
+ .file_ioctl = tomoyo_file_ioctl,
+ .file_ioctl_compat = tomoyo_file_ioctl,
+ .path_chmod = tomoyo_path_chmod,
+ .path_chown = tomoyo_path_chown,
+ .path_chroot = tomoyo_path_chroot,
+ .sb_mount = tomoyo_sb_mount,
+ .sb_umount = tomoyo_sb_umount,
+ .sb_pivotroot = tomoyo_sb_pivotroot,
+ .socket_bind = tomoyo_socket_bind,
+ .socket_connect = tomoyo_socket_connect,
+ .socket_listen = tomoyo_socket_listen,
+ .socket_sendmsg = tomoyo_socket_sendmsg,
+ };
+
+ if (!tomoyo_ops.enabled)
+ return -EINVAL;
+ tomoyo_ops.check_profile = tomoyo_check_profile;
+ pr_info("TOMOYO Linux initialized\n");
+ tomoyo_task(current);
+ tomoyo_mm_init();
+ tomoyo_interface_init();
+ tomoyo_register_hooks(&tomoyo_hooks);
+ return 0;
+}
+module_init(tomoyo_runtime_init);
+MODULE_LICENSE("GPL");
+
+#endif
diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c
index a2705798476f..a3b821b7f477 100644
--- a/security/tomoyo/securityfs_if.c
+++ b/security/tomoyo/securityfs_if.c
@@ -229,17 +229,19 @@ static void __init tomoyo_create_entry(const char *name, const umode_t mode,
}
/**
- * tomoyo_initerface_init - Initialize /sys/kernel/security/tomoyo/ interface.
+ * tomoyo_interface_init - Initialize /sys/kernel/security/tomoyo/ interface.
*
* Returns 0.
*/
-static int __init tomoyo_initerface_init(void)
+int __init tomoyo_interface_init(void)
{
struct tomoyo_domain_info *domain;
struct dentry *tomoyo_dir;
+#ifndef CONFIG_SECURITY_TOMOYO_LKM
if (!tomoyo_enabled)
return 0;
+#endif
domain = tomoyo_domain();
/* Don't create securityfs entries unless registered. */
if (domain != &tomoyo_kernel_domain)
@@ -270,4 +272,6 @@ static int __init tomoyo_initerface_init(void)
return 0;
}
-fs_initcall(tomoyo_initerface_init);
+#ifndef CONFIG_SECURITY_TOMOYO_LKM
+fs_initcall(tomoyo_interface_init);
+#endif
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index 6799b1122c9d..b851ff377382 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -13,9 +13,6 @@
/* Lock for protecting policy. */
DEFINE_MUTEX(tomoyo_policy_lock);
-/* Has /sbin/init started? */
-bool tomoyo_policy_loaded;
-
/*
* Mapping table from "enum tomoyo_mac_index" to
* "enum tomoyo_mac_category_index".
diff --git a/sound/core/control.c b/sound/core/control.c
index 4f55f64c42e1..2f790a7b1e90 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -2267,7 +2267,6 @@ static const struct file_operations snd_ctl_f_ops =
.read = snd_ctl_read,
.open = snd_ctl_open,
.release = snd_ctl_release,
- .llseek = no_llseek,
.poll = snd_ctl_poll,
.unlocked_ioctl = snd_ctl_ioctl,
.compat_ioctl = snd_ctl_ioctl_compat,
diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c
index 33bf9a220ada..668604d0ec9d 100644
--- a/sound/core/oss/mixer_oss.c
+++ b/sound/core/oss/mixer_oss.c
@@ -412,7 +412,6 @@ static const struct file_operations snd_mixer_oss_f_ops =
.owner = THIS_MODULE,
.open = snd_mixer_oss_open,
.release = snd_mixer_oss_release,
- .llseek = no_llseek,
.unlocked_ioctl = snd_mixer_oss_ioctl,
.compat_ioctl = snd_mixer_oss_ioctl_compat,
};
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 7386982cf40e..4683b9139c56 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -3106,7 +3106,6 @@ static const struct file_operations snd_pcm_oss_f_reg =
.write = snd_pcm_oss_write,
.open = snd_pcm_oss_open,
.release = snd_pcm_oss_release,
- .llseek = no_llseek,
.poll = snd_pcm_oss_poll,
.unlocked_ioctl = snd_pcm_oss_ioctl,
.compat_ioctl = snd_pcm_oss_ioctl_compat,
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 99e39b5359cc..5b9076829ade 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -4115,7 +4115,6 @@ const struct file_operations snd_pcm_f_ops[2] = {
.write_iter = snd_pcm_writev,
.open = snd_pcm_playback_open,
.release = snd_pcm_release,
- .llseek = no_llseek,
.poll = snd_pcm_poll,
.unlocked_ioctl = snd_pcm_ioctl,
.compat_ioctl = snd_pcm_ioctl_compat,
@@ -4129,7 +4128,6 @@ const struct file_operations snd_pcm_f_ops[2] = {
.read_iter = snd_pcm_readv,
.open = snd_pcm_capture_open,
.release = snd_pcm_release,
- .llseek = no_llseek,
.poll = snd_pcm_poll,
.unlocked_ioctl = snd_pcm_ioctl,
.compat_ioctl = snd_pcm_ioctl_compat,
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 7accf9a1ddf4..03306be5fa02 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -1784,7 +1784,6 @@ static const struct file_operations snd_rawmidi_f_ops = {
.write = snd_rawmidi_write,
.open = snd_rawmidi_open,
.release = snd_rawmidi_release,
- .llseek = no_llseek,
.poll = snd_rawmidi_poll,
.unlocked_ioctl = snd_rawmidi_ioctl,
.compat_ioctl = snd_rawmidi_ioctl_compat,
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 6437193e42bf..3930e2f9082f 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -2722,7 +2722,6 @@ static const struct file_operations snd_seq_f_ops =
.write = snd_seq_write,
.open = snd_seq_open,
.release = snd_seq_release,
- .llseek = no_llseek,
.poll = snd_seq_poll,
.unlocked_ioctl = snd_seq_ioctl,
.compat_ioctl = snd_seq_ioctl_compat,
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 668c40bac318..fbada79380f9 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -2436,7 +2436,6 @@ static const struct file_operations snd_timer_f_ops =
.read = snd_timer_user_read,
.open = snd_timer_user_open,
.release = snd_timer_user_release,
- .llseek = no_llseek,
.poll = snd_timer_user_poll,
.unlocked_ioctl = snd_timer_user_ioctl,
.compat_ioctl = snd_timer_user_ioctl_compat,
diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c
index 4b1baf4dd50e..dea2d9b18fc9 100644
--- a/sound/oss/dmasound/dmasound_core.c
+++ b/sound/oss/dmasound/dmasound_core.c
@@ -381,7 +381,6 @@ static long mixer_unlocked_ioctl(struct file *file, u_int cmd, u_long arg)
static const struct file_operations mixer_fops =
{
.owner = THIS_MODULE,
- .llseek = no_llseek,
.unlocked_ioctl = mixer_unlocked_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.open = mixer_open,
@@ -1155,7 +1154,6 @@ static long sq_unlocked_ioctl(struct file *file, u_int cmd, u_long arg)
static const struct file_operations sq_fops =
{
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = sq_write,
.poll = sq_poll,
.unlocked_ioctl = sq_unlocked_ioctl,
@@ -1351,7 +1349,6 @@ static ssize_t state_read(struct file *file, char __user *buf, size_t count,
static const struct file_operations state_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = state_read,
.open = state_open,
.release = state_release,
diff --git a/sound/soc/intel/avs/debugfs.c b/sound/soc/intel/avs/debugfs.c
index 3fc2bbb63369..1767ded4d983 100644
--- a/sound/soc/intel/avs/debugfs.c
+++ b/sound/soc/intel/avs/debugfs.c
@@ -68,7 +68,6 @@ static ssize_t fw_regs_read(struct file *file, char __user *to, size_t count, lo
static const struct file_operations fw_regs_fops = {
.open = simple_open,
.read = fw_regs_read,
- .llseek = no_llseek,
};
static ssize_t debug_window_read(struct file *file, char __user *to, size_t count, loff_t *ppos)
@@ -93,7 +92,6 @@ static ssize_t debug_window_read(struct file *file, char __user *to, size_t coun
static const struct file_operations debug_window_fops = {
.open = simple_open,
.read = debug_window_read,
- .llseek = no_llseek,
};
static ssize_t probe_points_read(struct file *file, char __user *to, size_t count, loff_t *ppos)
@@ -170,7 +168,6 @@ static const struct file_operations probe_points_fops = {
.open = simple_open,
.read = probe_points_read,
.write = probe_points_write,
- .llseek = no_llseek,
};
static ssize_t probe_points_disconnect_write(struct file *file, const char __user *from,
diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h
index b7183576d8eb..7baaa5898ca2 100644
--- a/tools/include/linux/linkage.h
+++ b/tools/include/linux/linkage.h
@@ -4,7 +4,9 @@
#include <linux/export.h>
#define SYM_FUNC_START(x) .globl x; x:
-
#define SYM_FUNC_END(x)
+#define SYM_DATA_START(x) .globl x; x:
+#define SYM_DATA_START_LOCAL(x) x:
+#define SYM_DATA_END(x)
#endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */
diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c
index aee479d2191c..69b66994f2a1 100644
--- a/tools/objtool/arch/loongarch/decode.c
+++ b/tools/objtool/arch/loongarch/decode.c
@@ -122,7 +122,7 @@ static bool decode_insn_reg2i12_fomat(union loongarch_instruction inst,
switch (inst.reg2i12_format.opcode) {
case addid_op:
if ((inst.reg2i12_format.rd == CFI_SP) || (inst.reg2i12_format.rj == CFI_SP)) {
- /* addi.d sp,sp,si12 or addi.d fp,sp,si12 */
+ /* addi.d sp,sp,si12 or addi.d fp,sp,si12 or addi.d sp,fp,si12 */
insn->immediate = sign_extend64(inst.reg2i12_format.immediate, 11);
ADD_OP(op) {
op->src.type = OP_SRC_ADD;
@@ -132,6 +132,15 @@ static bool decode_insn_reg2i12_fomat(union loongarch_instruction inst,
op->dest.reg = inst.reg2i12_format.rd;
}
}
+ if ((inst.reg2i12_format.rd == CFI_SP) && (inst.reg2i12_format.rj == CFI_FP)) {
+ /* addi.d sp,fp,si12 */
+ struct symbol *func = find_func_containing(insn->sec, insn->offset);
+
+ if (!func)
+ return false;
+
+ func->frame_pointer = true;
+ }
break;
case ldd_op:
if (inst.reg2i12_format.rj == CFI_SP) {
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index d086f207a3d3..6604f5d038aa 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3043,10 +3043,27 @@ static int update_cfi_state(struct instruction *insn,
break;
}
- if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
+ if (op->dest.reg == CFI_BP && op->src.reg == CFI_SP &&
+ insn->sym->frame_pointer) {
+ /* addi.d fp,sp,imm on LoongArch */
+ if (cfa->base == CFI_SP && cfa->offset == op->src.offset) {
+ cfa->base = CFI_BP;
+ cfa->offset = 0;
+ }
+ break;
+ }
- /* lea disp(%rbp), %rsp */
- cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+ if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
+ /* addi.d sp,fp,imm on LoongArch */
+ if (cfa->base == CFI_BP && cfa->offset == 0) {
+ if (insn->sym->frame_pointer) {
+ cfa->base = CFI_SP;
+ cfa->offset = -op->src.offset;
+ }
+ } else {
+ /* lea disp(%rbp), %rsp */
+ cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+ }
break;
}
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 2b8a69de4db8..d7e815c2fd15 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -68,6 +68,7 @@ struct symbol {
u8 warned : 1;
u8 embedded_insn : 1;
u8 local_label : 1;
+ u8 frame_pointer : 1;
struct list_head pv_target;
struct reloc *relocs;
};
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 3d1ca9e38b1f..b1256fee3567 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -14,7 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode
ldflags-y += --wrap=devm_cxl_add_rch_dport
ldflags-y += --wrap=cxl_rcd_component_reg_phys
ldflags-y += --wrap=cxl_endpoint_parse_cdat
-ldflags-y += --wrap=cxl_setup_parent_dport
+ldflags-y += --wrap=cxl_dport_init_ras_reporting
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c
index 55813de26d46..8da94378ccec 100644
--- a/tools/testing/cxl/mock_acpi.c
+++ b/tools/testing/cxl/mock_acpi.c
@@ -18,7 +18,7 @@ struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev)
goto out;
}
- if (dev->bus == &platform_bus_type)
+ if (dev_is_platform(dev))
goto out;
adev = to_acpi_device(dev);
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 129f179b0ac5..ccdd6a504222 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -8,6 +8,7 @@
#include <linux/delay.h>
#include <linux/sizes.h>
#include <linux/bits.h>
+#include <cxl/mailbox.h>
#include <asm/unaligned.h>
#include <crypto/sha2.h>
#include <cxlmem.h>
@@ -534,6 +535,7 @@ static int mock_gsl(struct cxl_mbox_cmd *cmd)
static int mock_get_log(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *cmd)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_log *gl = cmd->payload_in;
u32 offset = le32_to_cpu(gl->offset);
u32 length = le32_to_cpu(gl->length);
@@ -542,7 +544,7 @@ static int mock_get_log(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *cmd)
if (cmd->size_in < sizeof(*gl))
return -EINVAL;
- if (length > mds->payload_size)
+ if (length > cxl_mbox->payload_size)
return -EINVAL;
if (offset + length > sizeof(mock_cel))
return -EINVAL;
@@ -617,12 +619,13 @@ void cxl_mockmem_sanitize_work(struct work_struct *work)
{
struct cxl_memdev_state *mds =
container_of(work, typeof(*mds), security.poll_dwork.work);
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
- mutex_lock(&mds->mbox_mutex);
+ mutex_lock(&cxl_mbox->mbox_mutex);
if (mds->security.sanitize_node)
sysfs_notify_dirent(mds->security.sanitize_node);
mds->security.sanitize_active = false;
- mutex_unlock(&mds->mbox_mutex);
+ mutex_unlock(&cxl_mbox->mbox_mutex);
dev_dbg(mds->cxlds.dev, "sanitize complete\n");
}
@@ -631,6 +634,7 @@ static int mock_sanitize(struct cxl_mockmem_data *mdata,
struct cxl_mbox_cmd *cmd)
{
struct cxl_memdev_state *mds = mdata->mds;
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
int rc = 0;
if (cmd->size_in != 0)
@@ -648,14 +652,14 @@ static int mock_sanitize(struct cxl_mockmem_data *mdata,
return -ENXIO;
}
- mutex_lock(&mds->mbox_mutex);
+ mutex_lock(&cxl_mbox->mbox_mutex);
if (schedule_delayed_work(&mds->security.poll_dwork,
msecs_to_jiffies(mdata->sanitize_timeout))) {
mds->security.sanitize_active = true;
dev_dbg(mds->cxlds.dev, "sanitize issued\n");
} else
rc = -EBUSY;
- mutex_unlock(&mds->mbox_mutex);
+ mutex_unlock(&cxl_mbox->mbox_mutex);
return rc;
}
@@ -1333,12 +1337,13 @@ static int mock_activate_fw(struct cxl_mockmem_data *mdata,
return -EINVAL;
}
-static int cxl_mock_mbox_send(struct cxl_memdev_state *mds,
+static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *cmd)
{
- struct cxl_dev_state *cxlds = &mds->cxlds;
- struct device *dev = cxlds->dev;
+ struct device *dev = cxl_mbox->host;
struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
+ struct cxl_memdev_state *mds = mdata->mds;
+ struct cxl_dev_state *cxlds = &mds->cxlds;
int rc = -EIO;
switch (cmd->opcode) {
@@ -1453,6 +1458,17 @@ static ssize_t event_trigger_store(struct device *dev,
}
static DEVICE_ATTR_WO(event_trigger);
+static int cxl_mock_mailbox_create(struct cxl_dev_state *cxlds)
+{
+ int rc;
+
+ rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
static int cxl_mock_mem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -1460,6 +1476,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
struct cxl_memdev_state *mds;
struct cxl_dev_state *cxlds;
struct cxl_mockmem_data *mdata;
+ struct cxl_mailbox *cxl_mbox;
int rc;
mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL);
@@ -1487,13 +1504,18 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (IS_ERR(mds))
return PTR_ERR(mds);
+ cxlds = &mds->cxlds;
+ rc = cxl_mock_mailbox_create(cxlds);
+ if (rc)
+ return rc;
+
+ cxl_mbox = &mds->cxlds.cxl_mbox;
mdata->mds = mds;
- mds->mbox_send = cxl_mock_mbox_send;
- mds->payload_size = SZ_4K;
+ cxl_mbox->mbox_send = cxl_mock_mbox_send;
+ cxl_mbox->payload_size = SZ_4K;
mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf;
INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work);
- cxlds = &mds->cxlds;
cxlds->serial = pdev->id;
if (is_rcd(pdev))
cxlds->rcd = true;
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index d619672faa49..f4ce96cc11d4 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -228,7 +228,7 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL);
-int __wrap_cxl_dvsec_rr_decode(struct device *dev, int dvsec,
+int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port,
struct cxl_endpoint_dvsec_info *info)
{
int rc = 0, index;
@@ -237,7 +237,7 @@ int __wrap_cxl_dvsec_rr_decode(struct device *dev, int dvsec,
if (ops && ops->is_mock_dev(dev))
rc = 0;
else
- rc = cxl_dvsec_rr_decode(dev, dvsec, info);
+ rc = cxl_dvsec_rr_decode(dev, port, info);
put_cxl_mock_ops(index);
return rc;
@@ -299,17 +299,17 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL);
-void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
+void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
{
int index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (!ops || !ops->is_mock_port(dport->dport_dev))
- cxl_setup_parent_dport(host, dport);
+ cxl_dport_init_ras_reporting(dport, host);
put_cxl_mock_ops(index);
}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL);
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, CXL);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(ACPI);
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 6d9381d60172..7f57abf936e7 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -5,3 +5,7 @@
!*.h
!*.S
!*.sh
+!.gitignore
+!config
+!settings
+!Makefile
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 0c4b254ab56b..960cf6a77198 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -130,6 +130,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
+TEST_GEN_PROGS_x86_64 += coalesced_io_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
@@ -167,6 +168,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
TEST_GEN_PROGS_aarch64 += arch_timer
+TEST_GEN_PROGS_aarch64 += coalesced_io_test
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
@@ -188,6 +190,7 @@ TEST_GEN_PROGS_s390x += s390x/tprot
TEST_GEN_PROGS_s390x += s390x/cmma_test
TEST_GEN_PROGS_s390x += s390x/debug_test
TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test
+TEST_GEN_PROGS_s390x += s390x/ucontrol_test
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += guest_print_test
@@ -200,6 +203,7 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
TEST_GEN_PROGS_riscv += riscv/ebreak_test
TEST_GEN_PROGS_riscv += arch_timer
+TEST_GEN_PROGS_riscv += coalesced_io_test
TEST_GEN_PROGS_riscv += demand_paging_test
TEST_GEN_PROGS_riscv += dirty_log_test
TEST_GEN_PROGS_riscv += get-reg-list
diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c
new file mode 100644
index 000000000000..60cb25454899
--- /dev/null
+++ b/tools/testing/selftests/kvm/coalesced_io_test.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/sizes.h>
+
+#include <kvm_util.h>
+#include <processor.h>
+
+#include "ucall_common.h"
+
+struct kvm_coalesced_io {
+ struct kvm_coalesced_mmio_ring *ring;
+ uint32_t ring_size;
+ uint64_t mmio_gpa;
+ uint64_t *mmio;
+
+ /*
+ * x86-only, but define pio_port for all architectures to minimize the
+ * amount of #ifdeffery and complexity, without having to sacrifice
+ * verbose error messages.
+ */
+ uint8_t pio_port;
+};
+
+static struct kvm_coalesced_io kvm_builtin_io_ring;
+
+#ifdef __x86_64__
+static const int has_pio = 1;
+#else
+static const int has_pio = 0;
+#endif
+
+static void guest_code(struct kvm_coalesced_io *io)
+{
+ int i, j;
+
+ for (;;) {
+ for (j = 0; j < 1 + has_pio; j++) {
+ /*
+ * KVM always leaves one free entry, i.e. exits to
+ * userspace before the last entry is filled.
+ */
+ for (i = 0; i < io->ring_size - 1; i++) {
+#ifdef __x86_64__
+ if (i & 1)
+ outl(io->pio_port, io->pio_port + i);
+ else
+#endif
+ WRITE_ONCE(*io->mmio, io->mmio_gpa + i);
+ }
+#ifdef __x86_64__
+ if (j & 1)
+ outl(io->pio_port, io->pio_port + i);
+ else
+#endif
+ WRITE_ONCE(*io->mmio, io->mmio_gpa + i);
+ }
+ GUEST_SYNC(0);
+
+ WRITE_ONCE(*io->mmio, io->mmio_gpa + i);
+#ifdef __x86_64__
+ outl(io->pio_port, io->pio_port + i);
+#endif
+ }
+}
+
+static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu,
+ struct kvm_coalesced_io *io,
+ uint32_t ring_start,
+ uint32_t expected_exit)
+{
+ const bool want_pio = expected_exit == KVM_EXIT_IO;
+ struct kvm_coalesced_mmio_ring *ring = io->ring;
+ struct kvm_run *run = vcpu->run;
+ uint32_t pio_value;
+
+ WRITE_ONCE(ring->first, ring_start);
+ WRITE_ONCE(ring->last, ring_start);
+
+ vcpu_run(vcpu);
+
+ /*
+ * Annoyingly, reading PIO data is safe only for PIO exits, otherwise
+ * data_offset is garbage, e.g. an MMIO gpa.
+ */
+ if (run->exit_reason == KVM_EXIT_IO)
+ pio_value = *(uint32_t *)((void *)run + run->io.data_offset);
+ else
+ pio_value = 0;
+
+ TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write &&
+ run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 &&
+ *(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) ||
+ (want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port &&
+ run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 &&
+ pio_value == io->pio_port + io->ring_size - 1)),
+ "For start = %u, expected exit on %u-byte %s write 0x%llx = %lx, got exit_reason = %u (%s)\n "
+ "(MMIO addr = 0x%llx, write = %u, len = %u, data = %lx)\n "
+ "(PIO port = 0x%x, write = %u, len = %u, count = %u, data = %x",
+ ring_start, want_pio ? 4 : 8, want_pio ? "PIO" : "MMIO",
+ want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa,
+ (want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason,
+ run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other",
+ run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data,
+ run->io.port, run->io.direction, run->io.size, run->io.count, pio_value);
+}
+
+static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu,
+ struct kvm_coalesced_io *io,
+ uint32_t ring_start,
+ uint32_t expected_exit)
+{
+ struct kvm_coalesced_mmio_ring *ring = io->ring;
+ int i;
+
+ vcpu_run_and_verify_io_exit(vcpu, io, ring_start, expected_exit);
+
+ TEST_ASSERT((ring->last + 1) % io->ring_size == ring->first,
+ "Expected ring to be full (minus 1), first = %u, last = %u, max = %u, start = %u",
+ ring->first, ring->last, io->ring_size, ring_start);
+
+ for (i = 0; i < io->ring_size - 1; i++) {
+ uint32_t idx = (ring->first + i) % io->ring_size;
+ struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx];
+
+#ifdef __x86_64__
+ if (i & 1)
+ TEST_ASSERT(entry->phys_addr == io->pio_port &&
+ entry->len == 4 && entry->pio &&
+ *(uint32_t *)entry->data == io->pio_port + i,
+ "Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x",
+ io->pio_port, io->pio_port + i, i,
+ entry->len, entry->pio ? "PIO" : "MMIO",
+ entry->phys_addr, *(uint32_t *)entry->data);
+ else
+#endif
+ TEST_ASSERT(entry->phys_addr == io->mmio_gpa &&
+ entry->len == 8 && !entry->pio,
+ "Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx",
+ io->mmio_gpa, io->mmio_gpa + i, i,
+ entry->len, entry->pio ? "PIO" : "MMIO",
+ entry->phys_addr, *(uint64_t *)entry->data);
+ }
+}
+
+static void test_coalesced_io(struct kvm_vcpu *vcpu,
+ struct kvm_coalesced_io *io, uint32_t ring_start)
+{
+ struct kvm_coalesced_mmio_ring *ring = io->ring;
+
+ kvm_vm_register_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */);
+#ifdef __x86_64__
+ kvm_vm_register_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */);
+#endif
+
+ vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_MMIO);
+#ifdef __x86_64__
+ vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_IO);
+#endif
+
+ /*
+ * Verify ucall, which may use non-coalesced MMIO or PIO, generates an
+ * immediate exit.
+ */
+ WRITE_ONCE(ring->first, ring_start);
+ WRITE_ONCE(ring->last, ring_start);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+ TEST_ASSERT_EQ(ring->first, ring_start);
+ TEST_ASSERT_EQ(ring->last, ring_start);
+
+ /* Verify that non-coalesced MMIO/PIO generates an exit to userspace. */
+ kvm_vm_unregister_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */);
+ vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_MMIO);
+
+#ifdef __x86_64__
+ kvm_vm_unregister_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */);
+ vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_IO);
+#endif
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int i;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_MMIO));
+
+#ifdef __x86_64__
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_PIO));
+#endif
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ kvm_builtin_io_ring = (struct kvm_coalesced_io) {
+ /*
+ * The I/O ring is a kernel-allocated page whose address is
+ * relative to each vCPU's run page, with the page offset
+ * provided by KVM in the return of KVM_CAP_COALESCED_MMIO.
+ */
+ .ring = (void *)vcpu->run +
+ (kvm_check_cap(KVM_CAP_COALESCED_MMIO) * getpagesize()),
+
+ /*
+ * The size of the I/O ring is fixed, but KVM defines the sized
+ * based on the kernel's PAGE_SIZE. Thus, userspace must query
+ * the host's page size at runtime to compute the ring size.
+ */
+ .ring_size = (getpagesize() - sizeof(struct kvm_coalesced_mmio_ring)) /
+ sizeof(struct kvm_coalesced_mmio),
+
+ /*
+ * Arbitrary address+port (MMIO mustn't overlap memslots), with
+ * the MMIO GPA identity mapped in the guest.
+ */
+ .mmio_gpa = 4ull * SZ_1G,
+ .mmio = (uint64_t *)(4ull * SZ_1G),
+ .pio_port = 0x80,
+ };
+
+ virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1);
+
+ sync_global_to_guest(vm, kvm_builtin_io_ring);
+ vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring);
+
+ for (i = 0; i < kvm_builtin_io_ring.ring_size; i++)
+ test_coalesced_io(vcpu, &kvm_builtin_io_ring, i);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c
index 8092c2d0f5d6..bcf582852db9 100644
--- a/tools/testing/selftests/kvm/guest_print_test.c
+++ b/tools/testing/selftests/kvm/guest_print_test.c
@@ -107,6 +107,21 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg)
expected_assert_msg, &assert_msg[offset]);
}
+/*
+ * Open code vcpu_run(), sans the UCALL_ABORT handling, so that intentional
+ * guest asserts guest can be verified instead of being reported as failures.
+ */
+static void do_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ do {
+ r = __vcpu_run(vcpu);
+ } while (r == -1 && errno == EINTR);
+
+ TEST_ASSERT(!r, KVM_IOCTL_ERROR(KVM_RUN, r));
+}
+
static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
const char *expected_assert)
{
@@ -114,7 +129,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
struct ucall uc;
while (1) {
- vcpu_run(vcpu);
+ do_vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
"Unexpected exit reason: %u (%s),",
@@ -159,7 +174,7 @@ static void test_limits(void)
vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits);
run = vcpu->run;
- vcpu_run(vcpu);
+ do_vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
"Unexpected exit reason: %u (%s),",
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 63c2aaae51f3..bc7c242480d6 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -428,8 +428,6 @@ const char *vm_guest_mode_string(uint32_t i);
void kvm_vm_free(struct kvm_vm *vmp);
void kvm_vm_restart(struct kvm_vm *vmp);
void kvm_vm_release(struct kvm_vm *vmp);
-int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
- size_t len);
void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
int kvm_memfd_alloc(size_t size, bool hugepages);
@@ -460,6 +458,32 @@ static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL);
}
+static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm,
+ uint64_t address,
+ uint64_t size, bool pio)
+{
+ struct kvm_coalesced_mmio_zone zone = {
+ .addr = address,
+ .size = size,
+ .pio = pio,
+ };
+
+ vm_ioctl(vm, KVM_REGISTER_COALESCED_MMIO, &zone);
+}
+
+static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm,
+ uint64_t address,
+ uint64_t size, bool pio)
+{
+ struct kvm_coalesced_mmio_zone zone = {
+ .addr = address,
+ .size = size,
+ .pio = pio,
+ };
+
+ vm_ioctl(vm, KVM_UNREGISTER_COALESCED_MMIO, &zone);
+}
+
static inline int vm_get_stats_fd(struct kvm_vm *vm)
{
int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);
diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390x/debug_print.h
new file mode 100644
index 000000000000..1bf275631cc6
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390x/debug_print.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Definition for kernel virtual machines on s390x
+ *
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Christoph Schlameuss <schlameuss@linux.ibm.com>
+ */
+
+#ifndef SELFTEST_KVM_DEBUG_PRINT_H
+#define SELFTEST_KVM_DEBUG_PRINT_H
+
+#include "asm/ptrace.h"
+#include "kvm_util.h"
+#include "sie.h"
+
+static inline void print_hex_bytes(const char *name, u64 addr, size_t len)
+{
+ u64 pos;
+
+ pr_debug("%s (%p)\n", name, (void *)addr);
+ pr_debug(" 0/0x00---------|");
+ if (len > 8)
+ pr_debug(" 8/0x08---------|");
+ if (len > 16)
+ pr_debug(" 16/0x10--------|");
+ if (len > 24)
+ pr_debug(" 24/0x18--------|");
+ for (pos = 0; pos < len; pos += 8) {
+ if ((pos % 32) == 0)
+ pr_debug("\n %3lu 0x%.3lx ", pos, pos);
+ pr_debug(" %16lx", *((u64 *)(addr + pos)));
+ }
+ pr_debug("\n");
+}
+
+static inline void print_hex(const char *name, u64 addr)
+{
+ print_hex_bytes(name, addr, 512);
+}
+
+static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
+{
+ pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n",
+ run->flags,
+ run->psw_mask, run->psw_addr,
+ run->exit_reason, exit_reason_str(run->exit_reason));
+ pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n",
+ sie_block->psw_mask, sie_block->psw_addr);
+}
+
+static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
+{
+ print_hex_bytes("run", (u64)run, 0x150);
+ print_hex("sie_block", (u64)sie_block);
+ print_psw(run, sie_block);
+}
+
+static inline void print_regs(struct kvm_run *run)
+{
+ struct kvm_sync_regs *sync_regs = &run->s.regs;
+
+ print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS);
+ print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS);
+ print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS);
+}
+
+#endif /* SELFTEST_KVM_DEBUG_PRINT_H */
diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h
index 255c9b990f4c..481bd2fd6a32 100644
--- a/tools/testing/selftests/kvm/include/s390x/processor.h
+++ b/tools/testing/selftests/kvm/include/s390x/processor.h
@@ -21,6 +21,11 @@
#define PAGE_PROTECT 0x200 /* HW read-only bit */
#define PAGE_NOEXEC 0x100 /* HW no-execute bit */
+/* Page size definitions */
+#define PAGE_SHIFT 12
+#define PAGE_SIZE BIT_ULL(PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
/* Is there a portable way to do this? */
static inline void cpu_relax(void)
{
diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390x/sie.h
new file mode 100644
index 000000000000..160acd4a1db9
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390x/sie.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definition for kernel virtual machines on s390.
+ *
+ * Adapted copy of struct definition kvm_s390_sie_block from
+ * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs.
+ *
+ * Copyright IBM Corp. 2008, 2024
+ *
+ * Authors:
+ * Christoph Schlameuss <schlameuss@linux.ibm.com>
+ * Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef SELFTEST_KVM_SIE_H
+#define SELFTEST_KVM_SIE_H
+
+#include <linux/types.h>
+
+struct kvm_s390_sie_block {
+#define CPUSTAT_STOPPED 0x80000000
+#define CPUSTAT_WAIT 0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT 0x04000000
+#define CPUSTAT_IO_INT 0x02000000
+#define CPUSTAT_EXT_INT 0x01000000
+#define CPUSTAT_RUNNING 0x00800000
+#define CPUSTAT_RETAINED 0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB 0x00010000
+#define CPUSTAT_RRF 0x00008000
+#define CPUSTAT_SLSV 0x00004000
+#define CPUSTAT_SLSR 0x00002000
+#define CPUSTAT_ZARCH 0x00000800
+#define CPUSTAT_MCDS 0x00000100
+#define CPUSTAT_KSS 0x00000200
+#define CPUSTAT_SM 0x00000080
+#define CPUSTAT_IBS 0x00000040
+#define CPUSTAT_GED2 0x00000010
+#define CPUSTAT_G 0x00000008
+#define CPUSTAT_GED 0x00000004
+#define CPUSTAT_J 0x00000002
+#define CPUSTAT_P 0x00000001
+ __u32 cpuflags; /* 0x0000 */
+ __u32: 1; /* 0x0004 */
+ __u32 prefix : 18;
+ __u32: 1;
+ __u32 ibc : 12;
+ __u8 reserved08[4]; /* 0x0008 */
+#define PROG_IN_SIE BIT(0)
+ __u32 prog0c; /* 0x000c */
+ union {
+ __u8 reserved10[16]; /* 0x0010 */
+ struct {
+ __u64 pv_handle_cpu;
+ __u64 pv_handle_config;
+ };
+ };
+#define PROG_BLOCK_SIE BIT(0)
+#define PROG_REQUEST BIT(1)
+ __u32 prog20; /* 0x0020 */
+ __u8 reserved24[4]; /* 0x0024 */
+ __u64 cputm; /* 0x0028 */
+ __u64 ckc; /* 0x0030 */
+ __u64 epoch; /* 0x0038 */
+ __u32 svcc; /* 0x0040 */
+#define LCTL_CR0 0x8000
+#define LCTL_CR6 0x0200
+#define LCTL_CR9 0x0040
+#define LCTL_CR10 0x0020
+#define LCTL_CR11 0x0010
+#define LCTL_CR14 0x0002
+ __u16 lctl; /* 0x0044 */
+ __s16 icpua; /* 0x0046 */
+#define ICTL_OPEREXC 0x80000000
+#define ICTL_PINT 0x20000000
+#define ICTL_LPSW 0x00400000
+#define ICTL_STCTL 0x00040000
+#define ICTL_ISKE 0x00004000
+#define ICTL_SSKE 0x00002000
+#define ICTL_RRBE 0x00001000
+#define ICTL_TPROT 0x00000200
+ __u32 ictl; /* 0x0048 */
+#define ECA_CEI 0x80000000
+#define ECA_IB 0x40000000
+#define ECA_SIGPI 0x10000000
+#define ECA_MVPGI 0x01000000
+#define ECA_AIV 0x00200000
+#define ECA_VX 0x00020000
+#define ECA_PROTEXCI 0x00002000
+#define ECA_APIE 0x00000008
+#define ECA_SII 0x00000001
+ __u32 eca; /* 0x004c */
+#define ICPT_INST 0x04
+#define ICPT_PROGI 0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_EXTREQ 0x10
+#define ICPT_EXTINT 0x14
+#define ICPT_IOREQ 0x18
+#define ICPT_WAIT 0x1c
+#define ICPT_VALIDITY 0x20
+#define ICPT_STOP 0x28
+#define ICPT_OPEREXC 0x2C
+#define ICPT_PARTEXEC 0x38
+#define ICPT_IOINST 0x40
+#define ICPT_KSS 0x5c
+#define ICPT_MCHKREQ 0x60
+#define ICPT_INT_ENABLE 0x64
+#define ICPT_PV_INSTR 0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF 0x70
+ __u8 icptcode; /* 0x0050 */
+ __u8 icptstatus; /* 0x0051 */
+ __u16 ihcpu; /* 0x0052 */
+ __u8 reserved54; /* 0x0054 */
+#define IICTL_CODE_NONE 0x00
+#define IICTL_CODE_MCHK 0x01
+#define IICTL_CODE_EXT 0x02
+#define IICTL_CODE_IO 0x03
+#define IICTL_CODE_RESTART 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND 0x11
+ __u8 iictl; /* 0x0055 */
+ __u16 ipa; /* 0x0056 */
+ __u32 ipb; /* 0x0058 */
+ __u32 scaoh; /* 0x005c */
+#define FPF_BPBC 0x20
+ __u8 fpf; /* 0x0060 */
+#define ECB_GS 0x40
+#define ECB_TE 0x10
+#define ECB_SPECI 0x08
+#define ECB_SRSI 0x04
+#define ECB_HOSTPROTINT 0x02
+#define ECB_PTF 0x01
+ __u8 ecb; /* 0x0061 */
+#define ECB2_CMMA 0x80
+#define ECB2_IEP 0x20
+#define ECB2_PFMFI 0x08
+#define ECB2_ESCA 0x04
+#define ECB2_ZPCI_LSI 0x02
+ __u8 ecb2; /* 0x0062 */
+#define ECB3_AISI 0x20
+#define ECB3_AISII 0x10
+#define ECB3_DEA 0x08
+#define ECB3_AES 0x04
+#define ECB3_RI 0x01
+ __u8 ecb3; /* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
+ __u32 scaol; /* 0x0064 */
+ __u8 sdf; /* 0x0068 */
+ __u8 epdx; /* 0x0069 */
+ __u8 cpnc; /* 0x006a */
+ __u8 reserved6b; /* 0x006b */
+ __u32 todpr; /* 0x006c */
+#define GISA_FORMAT1 0x00000001
+ __u32 gd; /* 0x0070 */
+ __u8 reserved74[12]; /* 0x0074 */
+ __u64 mso; /* 0x0080 */
+ __u64 msl; /* 0x0088 */
+ __u64 psw_mask; /* 0x0090 */
+ __u64 psw_addr; /* 0x0098 */
+ __u64 gg14; /* 0x00a0 */
+ __u64 gg15; /* 0x00a8 */
+ __u8 reservedb0[8]; /* 0x00b0 */
+#define HPID_KVM 0x4
+#define HPID_VSIE 0x5
+ __u8 hpid; /* 0x00b8 */
+ __u8 reservedb9[7]; /* 0x00b9 */
+ union {
+ struct {
+ __u32 eiparams; /* 0x00c0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ };
+ __u64 mcic; /* 0x00c0 */
+ } __packed;
+ __u32 reservedc8; /* 0x00c8 */
+ union {
+ struct {
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ };
+ __u32 edc; /* 0x00cc */
+ } __packed;
+ union {
+ struct {
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ };
+ __u64 faddr; /* 0x00d0 */
+ } __packed;
+ __u64 peraddr; /* 0x00d8 */
+ __u8 eai; /* 0x00e0 */
+ __u8 peraid; /* 0x00e1 */
+ __u8 oai; /* 0x00e2 */
+ __u8 armid; /* 0x00e3 */
+ __u8 reservede4[4]; /* 0x00e4 */
+ union {
+ __u64 tecmc; /* 0x00e8 */
+ struct {
+ __u16 subchannel_id; /* 0x00e8 */
+ __u16 subchannel_nr; /* 0x00ea */
+ __u32 io_int_parm; /* 0x00ec */
+ __u32 io_int_word; /* 0x00f0 */
+ };
+ } __packed;
+ __u8 reservedf4[8]; /* 0x00f4 */
+#define CRYCB_FORMAT_MASK 0x00000003
+#define CRYCB_FORMAT0 0x00000000
+#define CRYCB_FORMAT1 0x00000001
+#define CRYCB_FORMAT2 0x00000003
+ __u32 crycbd; /* 0x00fc */
+ __u64 gcr[16]; /* 0x0100 */
+ union {
+ __u64 gbea; /* 0x0180 */
+ __u64 sidad;
+ };
+ __u8 reserved188[8]; /* 0x0188 */
+ __u64 sdnxo; /* 0x0190 */
+ __u8 reserved198[8]; /* 0x0198 */
+ __u32 fac; /* 0x01a0 */
+ __u8 reserved1a4[20]; /* 0x01a4 */
+ __u64 cbrlo; /* 0x01b8 */
+ __u8 reserved1c0[8]; /* 0x01c0 */
+#define ECD_HOSTREGMGMT 0x20000000
+#define ECD_MEF 0x08000000
+#define ECD_ETOKENF 0x02000000
+#define ECD_ECC 0x00200000
+ __u32 ecd; /* 0x01c8 */
+ __u8 reserved1cc[18]; /* 0x01cc */
+ __u64 pp; /* 0x01de */
+ __u8 reserved1e6[2]; /* 0x01e6 */
+ __u64 itdba; /* 0x01e8 */
+ __u64 riccbd; /* 0x01f0 */
+ __u64 gvrd; /* 0x01f8 */
+} __packed __aligned(512);
+
+#endif /* SELFTEST_KVM_SIE_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
index 0f268b55fa06..51990094effd 100644
--- a/tools/testing/selftests/kvm/include/x86_64/apic.h
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -11,6 +11,7 @@
#include <stdint.h>
#include "processor.h"
+#include "ucall_common.h"
#define APIC_DEFAULT_GPA 0xfee00000ULL
@@ -93,9 +94,27 @@ static inline uint64_t x2apic_read_reg(unsigned int reg)
return rdmsr(APIC_BASE_MSR + (reg >> 4));
}
+static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value)
+{
+ return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value);
+}
+
static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
{
- wrmsr(APIC_BASE_MSR + (reg >> 4), value);
+ uint8_t fault = x2apic_write_reg_safe(reg, value);
+
+ __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n",
+ fault, APIC_BASE_MSR + (reg >> 4), value);
}
+static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value)
+{
+ uint8_t fault = x2apic_write_reg_safe(reg, value);
+
+ __GUEST_ASSERT(fault == GP_VECTOR,
+ "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n",
+ APIC_BASE_MSR + (reg >> 4), value, fault);
+}
+
+
#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index fa65b908b13e..6849e2552f1b 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -186,6 +186,18 @@
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14)
+/* HYPERV_CPUID_NESTED_FEATURES.EAX */
+#define HV_X64_NESTED_DIRECT_FLUSH \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17)
+#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18)
+#define HV_X64_NESTED_MSR_BITMAP \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19)
+
+/* HYPERV_CPUID_NESTED_FEATURES.EBX */
+#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0)
+
/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \
KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1)
@@ -343,4 +355,10 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
#define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0)
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
+
+bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature);
+
#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index a0c1440017bb..e247f99e0473 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -25,6 +25,10 @@ extern bool host_cpu_is_intel;
extern bool host_cpu_is_amd;
extern uint64_t guest_tsc_khz;
+#ifndef MAX_NR_CPUID_ENTRIES
+#define MAX_NR_CPUID_ENTRIES 100
+#endif
+
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
@@ -908,8 +912,6 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
uint32_t function, uint32_t index);
const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
-const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
static inline uint32_t kvm_cpu_fms(void)
{
@@ -1009,7 +1011,6 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
}
void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
-void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
uint32_t function,
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 56b170b725b3..a2b7df5f1d39 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -712,16 +712,13 @@ void kvm_vm_release(struct kvm_vm *vmp)
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
- struct userspace_mem_region *region,
- bool unlink)
+ struct userspace_mem_region *region)
{
int ret;
- if (unlink) {
- rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
- rb_erase(&region->hva_node, &vm->regions.hva_tree);
- hash_del(&region->slot_node);
- }
+ rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
+ rb_erase(&region->hva_node, &vm->regions.hva_tree);
+ hash_del(&region->slot_node);
region->region.memory_size = 0;
vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
@@ -762,7 +759,7 @@ void kvm_vm_free(struct kvm_vm *vmp)
/* Free userspace_mem_regions. */
hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
- __vm_mem_region_delete(vmp, region, false);
+ __vm_mem_region_delete(vmp, region);
/* Free sparsebit arrays. */
sparsebit_free(&vmp->vpages_valid);
@@ -794,76 +791,6 @@ int kvm_memfd_alloc(size_t size, bool hugepages)
return fd;
}
-/*
- * Memory Compare, host virtual to guest virtual
- *
- * Input Args:
- * hva - Starting host virtual address
- * vm - Virtual Machine
- * gva - Starting guest virtual address
- * len - number of bytes to compare
- *
- * Output Args: None
- *
- * Input/Output Args: None
- *
- * Return:
- * Returns 0 if the bytes starting at hva for a length of len
- * are equal the guest virtual bytes starting at gva. Returns
- * a value < 0, if bytes at hva are less than those at gva.
- * Otherwise a value > 0 is returned.
- *
- * Compares the bytes starting at the host virtual address hva, for
- * a length of len, to the guest bytes starting at the guest virtual
- * address given by gva.
- */
-int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
-{
- size_t amt;
-
- /*
- * Compare a batch of bytes until either a match is found
- * or all the bytes have been compared.
- */
- for (uintptr_t offset = 0; offset < len; offset += amt) {
- uintptr_t ptr1 = (uintptr_t)hva + offset;
-
- /*
- * Determine host address for guest virtual address
- * at offset.
- */
- uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
-
- /*
- * Determine amount to compare on this pass.
- * Don't allow the comparsion to cross a page boundary.
- */
- amt = len - offset;
- if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
- amt = vm->page_size - (ptr1 % vm->page_size);
- if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
- amt = vm->page_size - (ptr2 % vm->page_size);
-
- assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
- assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
-
- /*
- * Perform the comparison. If there is a difference
- * return that result to the caller, otherwise need
- * to continue on looking for a mismatch.
- */
- int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
- if (ret != 0)
- return ret;
- }
-
- /*
- * No mismatch found. Let the caller know the two memory
- * areas are equal.
- */
- return 0;
-}
-
static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
struct userspace_mem_region *region)
{
@@ -1270,7 +1197,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
*/
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
{
- __vm_mem_region_delete(vm, memslot2region(vm, slot), true);
+ __vm_mem_region_delete(vm, memslot2region(vm, slot));
}
void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 4ad4492eea1d..20cfe970e3e3 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -14,7 +14,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
vm_paddr_t paddr;
- TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+ TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
if (vm->pgd_created)
@@ -79,7 +79,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
}
/* Fill in page table entry */
- idx = (gva >> 12) & 0x0ffu; /* page index */
+ idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */
if (!(entry[idx] & PAGE_INVALID))
fprintf(stderr,
"WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
@@ -91,7 +91,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
int ri, idx;
uint64_t *entry;
- TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+ TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
entry = addr_gpa2hva(vm, vm->pgd);
@@ -103,7 +103,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
}
- idx = (gva >> 12) & 0x0ffu; /* page index */
+ idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */
TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
"No page mapping for vm virtual address 0x%lx", gva);
@@ -168,7 +168,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
struct kvm_sregs sregs;
struct kvm_vcpu *vcpu;
- TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+ TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
index efb7e7a1354d..15bc8cd583aa 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
@@ -8,6 +8,73 @@
#include "processor.h"
#include "hyperv.h"
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ static struct kvm_cpuid2 *cpuid_full;
+ const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
+ int i, nent = 0;
+
+ if (!cpuid_full) {
+ cpuid_sys = kvm_get_supported_cpuid();
+ cpuid_hv = kvm_get_supported_hv_cpuid();
+
+ cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
+ if (!cpuid_full) {
+ perror("malloc");
+ abort();
+ }
+
+ /* Need to skip KVM CPUID leaves 0x400000xx */
+ for (i = 0; i < cpuid_sys->nent; i++) {
+ if (cpuid_sys->entries[i].function >= 0x40000000 &&
+ cpuid_sys->entries[i].function < 0x40000100)
+ continue;
+ cpuid_full->entries[nent] = cpuid_sys->entries[i];
+ nent++;
+ }
+
+ memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
+ cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
+ cpuid_full->nent = nent + cpuid_hv->nent;
+ }
+
+ vcpu_init_cpuid(vcpu, cpuid_full);
+}
+
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+
+ vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ return cpuid;
+}
+
+bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+ if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID))
+ return false;
+
+ return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature);
+}
+
struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
vm_vaddr_t *p_hv_pages_gva)
{
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 153739f2e201..974bcd2df6d7 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -19,8 +19,6 @@
#define KERNEL_DS 0x10
#define KERNEL_TSS 0x18
-#define MAX_NR_CPUID_ENTRIES 100
-
vm_vaddr_t exception_handlers;
bool host_cpu_is_amd;
bool host_cpu_is_intel;
@@ -566,10 +564,8 @@ void route_exception(struct ex_regs *regs)
if (kvm_fixup_exception(regs))
return;
- ucall_assert(UCALL_UNHANDLED,
- "Unhandled exception in guest", __FILE__, __LINE__,
- "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
- regs->vector, regs->rip);
+ GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+ regs->vector, regs->rip);
}
static void vm_init_descriptor_tables(struct kvm_vm *vm)
@@ -611,7 +607,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
struct ucall uc;
- if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT)
REPORT_GUEST_ASSERT(uc);
}
@@ -1195,65 +1191,6 @@ void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
}
-const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
-{
- static struct kvm_cpuid2 *cpuid;
- int kvm_fd;
-
- if (cpuid)
- return cpuid;
-
- cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
- kvm_fd = open_kvm_dev_path_or_exit();
-
- kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
- close(kvm_fd);
- return cpuid;
-}
-
-void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
-{
- static struct kvm_cpuid2 *cpuid_full;
- const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
- int i, nent = 0;
-
- if (!cpuid_full) {
- cpuid_sys = kvm_get_supported_cpuid();
- cpuid_hv = kvm_get_supported_hv_cpuid();
-
- cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
- if (!cpuid_full) {
- perror("malloc");
- abort();
- }
-
- /* Need to skip KVM CPUID leaves 0x400000xx */
- for (i = 0; i < cpuid_sys->nent; i++) {
- if (cpuid_sys->entries[i].function >= 0x40000000 &&
- cpuid_sys->entries[i].function < 0x40000100)
- continue;
- cpuid_full->entries[nent] = cpuid_sys->entries[i];
- nent++;
- }
-
- memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
- cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
- cpuid_full->nent = nent + cpuid_hv->nent;
- }
-
- vcpu_init_cpuid(vcpu, cpuid_full);
-}
-
-const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
-
- vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
- return cpuid;
-}
-
unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 49f162573126..e3343f0df9e1 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -79,6 +79,7 @@ struct test_params {
useconds_t delay;
uint64_t nr_iterations;
bool partition_vcpu_memory_access;
+ bool disable_slot_zap_quirk;
};
static void run_test(enum vm_guest_mode mode, void *arg)
@@ -89,6 +90,13 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
VM_MEM_SRC_ANONYMOUS,
p->partition_vcpu_memory_access);
+#ifdef __x86_64__
+ if (p->disable_slot_zap_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
+
+ pr_info("Memslot zap quirk %s\n", p->disable_slot_zap_quirk ?
+ "disabled" : "enabled");
+#endif
pr_info("Finished creating vCPUs\n");
@@ -107,11 +115,12 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-m mode] [-d delay_usec]\n"
+ printf("usage: %s [-h] [-m mode] [-d delay_usec] [-q]\n"
" [-b memory] [-v vcpus] [-o] [-i iterations]\n", name);
guest_modes_help();
printf(" -d: add a delay between each iteration of adding and\n"
" deleting a memslot in usec.\n");
+ printf(" -q: Disable memslot zap quirk.\n");
printf(" -b: specify the size of the memory region which should be\n"
" accessed by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
@@ -137,7 +146,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:d:qb:v:oi:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -160,6 +169,12 @@ int main(int argc, char *argv[])
case 'i':
p.nr_iterations = atoi_positive("Number of iterations", optarg);
break;
+ case 'q':
+ p.disable_slot_zap_quirk = true;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
+ KVM_X86_QUIRK_SLOT_ZAP_ALL);
+ break;
case 'h':
default:
help(argv[0]);
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 579a64f97333..893366982f77 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -113,6 +113,7 @@ static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
static sem_t vcpu_ready;
static bool map_unmap_verify;
+static bool disable_slot_zap_quirk;
static bool verbose;
#define pr_info_v(...) \
@@ -578,6 +579,9 @@ static bool test_memslot_move_prepare(struct vm_data *data,
uint32_t guest_page_size = data->vm->page_size;
uint64_t movesrcgpa, movetestgpa;
+ if (disable_slot_zap_quirk)
+ vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
+
movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
if (isactive) {
@@ -896,6 +900,7 @@ static void help(char *name, struct test_args *targs)
pr_info(" -h: print this help screen.\n");
pr_info(" -v: enable verbose mode (not for benchmarking).\n");
pr_info(" -d: enable extra debug checks.\n");
+ pr_info(" -q: Disable memslot zap quirk during memslot move.\n");
pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
targs->nslots);
pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
@@ -954,7 +959,7 @@ static bool parse_args(int argc, char *argv[],
uint32_t max_mem_slots;
int opt;
- while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
+ while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) {
switch (opt) {
case 'h':
default:
@@ -966,6 +971,11 @@ static bool parse_args(int argc, char *argv[],
case 'd':
map_unmap_verify = true;
break;
+ case 'q':
+ disable_slot_zap_quirk = true;
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
+ KVM_X86_QUIRK_SLOT_ZAP_ALL);
+ break;
case 's':
targs->nslots = atoi_paranoid(optarg);
if (targs->nslots <= 1 && targs->nslots != -1) {
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c
index b39033844756..e32dd59703a0 100644
--- a/tools/testing/selftests/kvm/s390x/cmma_test.c
+++ b/tools/testing/selftests/kvm/s390x/cmma_test.c
@@ -17,16 +17,17 @@
#include "kvm_util.h"
#include "kselftest.h"
#include "ucall_common.h"
+#include "processor.h"
#define MAIN_PAGE_COUNT 512
#define TEST_DATA_PAGE_COUNT 512
#define TEST_DATA_MEMSLOT 1
-#define TEST_DATA_START_GFN 4096
+#define TEST_DATA_START_GFN PAGE_SIZE
#define TEST_DATA_TWO_PAGE_COUNT 256
#define TEST_DATA_TWO_MEMSLOT 2
-#define TEST_DATA_TWO_START_GFN 8192
+#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE)
static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
@@ -66,7 +67,7 @@ static void guest_dirty_test_data(void)
" lghi 5,%[page_count]\n"
/* r5 += r1 */
"2: agfr 5,1\n"
- /* r2 = r1 << 12 */
+ /* r2 = r1 << PAGE_SHIFT */
"1: sllg 2,1,12(0)\n"
/* essa(r4, r2, SET_STABLE) */
" .insn rrf,0xb9ab0000,4,2,1,0\n"
diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390x/config
new file mode 100644
index 000000000000..23270f2d679f
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/config
@@ -0,0 +1,2 @@
+CONFIG_KVM=y
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c
index 84313fb27529..ad8095968601 100644
--- a/tools/testing/selftests/kvm/s390x/debug_test.c
+++ b/tools/testing/selftests/kvm/s390x/debug_test.c
@@ -2,12 +2,12 @@
/* Test KVM debugging features. */
#include "kvm_util.h"
#include "test_util.h"
+#include "sie.h"
#include <linux/kvm.h>
#define __LC_SVC_NEW_PSW 0x1c0
#define __LC_PGM_NEW_PSW 0x1d0
-#define ICPT_INSTRUCTION 0x04
#define IPA0_DIAG 0x8300
#define PGM_SPECIFICATION 0x06
@@ -85,7 +85,7 @@ static void test_step_pgm_diag(void)
vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
__LC_PGM_NEW_PSW, new_psw);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
- TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index f2df7416be84..4374b4cd2a80 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -16,6 +16,7 @@
#include "kvm_util.h"
#include "kselftest.h"
#include "ucall_common.h"
+#include "processor.h"
enum mop_target {
LOGICAL,
@@ -226,9 +227,6 @@ static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (1ULL << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE - 1))
#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c
index 7a742a673b7c..12d5e1cb62e3 100644
--- a/tools/testing/selftests/kvm/s390x/tprot.c
+++ b/tools/testing/selftests/kvm/s390x/tprot.c
@@ -9,9 +9,8 @@
#include "kvm_util.h"
#include "kselftest.h"
#include "ucall_common.h"
+#include "processor.h"
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (1 << PAGE_SHIFT)
#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
@@ -151,7 +150,7 @@ static enum stage perform_next_stage(int *i, bool mapped_0)
* instead.
* In order to skip these tests we detect this inside the guest
*/
- skip = tests[*i].addr < (void *)4096 &&
+ skip = tests[*i].addr < (void *)PAGE_SIZE &&
tests[*i].expected != TRANSL_UNAVAIL &&
!mapped_0;
if (!skip) {
diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c
new file mode 100644
index 000000000000..f257beec1430
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/ucontrol_test.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test code for the s390x kvm ucontrol interface
+ *
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Christoph Schlameuss <schlameuss@linux.ibm.com>
+ */
+#include "debug_print.h"
+#include "kselftest_harness.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sie.h"
+
+#include <linux/capability.h>
+#include <linux/sizes.h>
+
+#define VM_MEM_SIZE (4 * SZ_1M)
+
+/* so directly declare capget to check caps without libcap */
+int capget(cap_user_header_t header, cap_user_data_t data);
+
+/**
+ * In order to create user controlled virtual machines on S390,
+ * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL
+ * as privileged user (SYS_ADMIN).
+ */
+void require_ucontrol_admin(void)
+{
+ struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+ struct __user_cap_header_struct hdr = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ };
+ int rc;
+
+ rc = capget(&hdr, data);
+ TEST_ASSERT_EQ(0, rc);
+ TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0);
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
+}
+
+/* Test program setting some registers and looping */
+extern char test_gprs_asm[];
+asm("test_gprs_asm:\n"
+ "xgr %r0, %r0\n"
+ "lgfi %r1,1\n"
+ "lgfi %r2,2\n"
+ "lgfi %r3,3\n"
+ "lgfi %r4,4\n"
+ "lgfi %r5,5\n"
+ "lgfi %r6,6\n"
+ "lgfi %r7,7\n"
+ "0:\n"
+ " diag 0,0,0x44\n"
+ " ahi %r0,1\n"
+ " j 0b\n"
+);
+
+FIXTURE(uc_kvm)
+{
+ struct kvm_s390_sie_block *sie_block;
+ struct kvm_run *run;
+ uintptr_t base_gpa;
+ uintptr_t code_gpa;
+ uintptr_t base_hva;
+ uintptr_t code_hva;
+ int kvm_run_size;
+ void *vm_mem;
+ int vcpu_fd;
+ int kvm_fd;
+ int vm_fd;
+};
+
+/**
+ * create VM with single vcpu, map kvm_run and SIE control block for easy access
+ */
+FIXTURE_SETUP(uc_kvm)
+{
+ struct kvm_s390_vm_cpu_processor info;
+ int rc;
+
+ require_ucontrol_admin();
+
+ self->kvm_fd = open_kvm_dev_path_or_exit();
+ self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
+ ASSERT_GE(self->vm_fd, 0);
+
+ kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info);
+ TH_LOG("create VM 0x%llx", info.cpuid);
+
+ self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
+ ASSERT_GE(self->vcpu_fd, 0);
+
+ self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+ ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
+ TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
+ self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
+ ASSERT_NE(self->run, MAP_FAILED);
+ /**
+ * For virtual cpus that have been created with S390 user controlled
+ * virtual machines, the resulting vcpu fd can be memory mapped at page
+ * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
+ * the virtual cpu's hardware control block.
+ */
+ self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
+ ASSERT_NE(self->sie_block, MAP_FAILED);
+
+ TH_LOG("VM created %p %p", self->run, self->sie_block);
+
+ self->base_gpa = 0;
+ self->code_gpa = self->base_gpa + (3 * SZ_1M);
+
+ self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_SIZE);
+ ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno);
+ self->base_hva = (uintptr_t)self->vm_mem;
+ self->code_hva = self->base_hva - self->base_gpa + self->code_gpa;
+ struct kvm_s390_ucas_mapping map = {
+ .user_addr = self->base_hva,
+ .vcpu_addr = self->base_gpa,
+ .length = VM_MEM_SIZE,
+ };
+ TH_LOG("ucas map %p %p 0x%llx",
+ (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+ rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
+ ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s",
+ rc, strerror(errno));
+
+ TH_LOG("page in %p", (void *)self->base_gpa);
+ rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa);
+ ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s",
+ (void *)self->base_hva, rc, strerror(errno));
+
+ self->sie_block->cpuflags &= ~CPUSTAT_STOPPED;
+}
+
+FIXTURE_TEARDOWN(uc_kvm)
+{
+ munmap(self->sie_block, PAGE_SIZE);
+ munmap(self->run, self->kvm_run_size);
+ close(self->vcpu_fd);
+ close(self->vm_fd);
+ close(self->kvm_fd);
+ free(self->vm_mem);
+}
+
+TEST_F(uc_kvm, uc_sie_assertions)
+{
+ /* assert interception of Code 08 (Program Interruption) is set */
+ EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI);
+}
+
+TEST_F(uc_kvm, uc_attr_mem_limit)
+{
+ u64 limit;
+ struct kvm_device_attr attr = {
+ .group = KVM_S390_VM_MEM_CTRL,
+ .attr = KVM_S390_VM_MEM_LIMIT_SIZE,
+ .addr = (unsigned long)&limit,
+ };
+ int rc;
+
+ rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ EXPECT_EQ(0, rc);
+ EXPECT_EQ(~0UL, limit);
+
+ /* assert set not supported */
+ rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+TEST_F(uc_kvm, uc_no_dirty_log)
+{
+ struct kvm_dirty_log dlog;
+ int rc;
+
+ rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+/**
+ * Assert HPAGE CAP cannot be enabled on UCONTROL VM
+ */
+TEST(uc_cap_hpage)
+{
+ int rc, kvm_fd, vm_fd, vcpu_fd;
+ struct kvm_enable_cap cap = {
+ .cap = KVM_CAP_S390_HPAGE_1M,
+ };
+
+ require_ucontrol_admin();
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
+ ASSERT_GE(vm_fd, 0);
+
+ /* assert hpages are not supported on ucontrol vm */
+ rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M);
+ EXPECT_EQ(0, rc);
+
+ /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */
+ rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* assert HPAGE CAP is rejected after vCPU creation */
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ ASSERT_GE(vcpu_fd, 0);
+ rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EBUSY, errno);
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+}
+
+/* verify SIEIC exit
+ * * fail on codes not expected in the test cases
+ */
+static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+ struct kvm_run *run = self->run;
+
+ /* check SIE interception code */
+ pr_info("sieic: 0x%.2x 0x%.4x 0x%.4x\n",
+ run->s390_sieic.icptcode,
+ run->s390_sieic.ipa,
+ run->s390_sieic.ipb);
+ switch (run->s390_sieic.icptcode) {
+ case ICPT_INST:
+ /* end execution in caller on intercepted instruction */
+ pr_info("sie instruction interception\n");
+ return false;
+ case ICPT_OPEREXC:
+ /* operation exception */
+ TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb);
+ default:
+ TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode);
+ }
+ return true;
+}
+
+/* verify VM state on exit */
+static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) * self)
+{
+ struct kvm_run *run = self->run;
+
+ switch (run->exit_reason) {
+ case KVM_EXIT_S390_SIEIC:
+ return uc_handle_sieic(self);
+ default:
+ pr_info("exit_reason %2d not handled\n", run->exit_reason);
+ }
+ return true;
+}
+
+/* run the VM until interrupted */
+static int uc_run_once(FIXTURE_DATA(uc_kvm) * self)
+{
+ int rc;
+
+ rc = ioctl(self->vcpu_fd, KVM_RUN, NULL);
+ print_run(self->run, self->sie_block);
+ print_regs(self->run);
+ pr_debug("run %d / %d %s\n", rc, errno, strerror(errno));
+ return rc;
+}
+
+static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) * self)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+
+ /* assert vm was interrupted by diag 0x0044 */
+ TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
+ TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
+ TEST_ASSERT_EQ(0x8300, sie_block->ipa);
+ TEST_ASSERT_EQ(0x440000, sie_block->ipb);
+}
+
+TEST_F(uc_kvm, uc_gprs)
+{
+ struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+ struct kvm_run *run = self->run;
+ struct kvm_regs regs = {};
+
+ /* Set registers to values that are different from the ones that we expect below */
+ for (int i = 0; i < 8; i++)
+ sync_regs->gprs[i] = 8;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+ /* copy test_gprs_asm to code_hva / code_gpa */
+ TH_LOG("copy code %p to vm mapped memory %p / %p",
+ &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa);
+ memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE);
+
+ /* DAT disabled + 64 bit mode */
+ run->psw_mask = 0x0000000180000000ULL;
+ run->psw_addr = self->code_gpa;
+
+ /* run and expect interception of diag 44 */
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ uc_assert_diag44(self);
+
+ /* Retrieve and check guest register values */
+ ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
+ for (int i = 0; i < 8; i++) {
+ ASSERT_EQ(i, regs.gprs[i]);
+ ASSERT_EQ(i, sync_regs->gprs[i]);
+ }
+
+ /* run and expect interception of diag 44 again */
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ uc_assert_diag44(self);
+
+ /* check continued increment of register 0 value */
+ ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
+ ASSERT_EQ(1, regs.gprs[0]);
+ ASSERT_EQ(1, sync_regs->gprs[0]);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index bb8002084f52..a8267628e9ed 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -175,7 +175,7 @@ static void guest_code_move_memory_region(void)
GUEST_DONE();
}
-static void test_move_memory_region(void)
+static void test_move_memory_region(bool disable_slot_zap_quirk)
{
pthread_t vcpu_thread;
struct kvm_vcpu *vcpu;
@@ -184,6 +184,9 @@ static void test_move_memory_region(void)
vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region);
+ if (disable_slot_zap_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
+
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
/*
@@ -266,7 +269,7 @@ static void guest_code_delete_memory_region(void)
GUEST_ASSERT(0);
}
-static void test_delete_memory_region(void)
+static void test_delete_memory_region(bool disable_slot_zap_quirk)
{
pthread_t vcpu_thread;
struct kvm_vcpu *vcpu;
@@ -276,6 +279,9 @@ static void test_delete_memory_region(void)
vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region);
+ if (disable_slot_zap_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
+
/* Delete the memory region, the guest should not die. */
vm_mem_region_delete(vm, MEM_REGION_SLOT);
wait_for_vcpu();
@@ -553,7 +559,10 @@ int main(int argc, char *argv[])
{
#ifdef __x86_64__
int i, loops;
+ int j, disable_slot_zap_quirk = 0;
+ if (kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_SLOT_ZAP_ALL)
+ disable_slot_zap_quirk = 1;
/*
* FIXME: the zero-memslot test fails on aarch64 and s390x because
* KVM_RUN fails with ENOEXEC or EFAULT.
@@ -579,13 +588,17 @@ int main(int argc, char *argv[])
else
loops = 10;
- pr_info("Testing MOVE of in-use region, %d loops\n", loops);
- for (i = 0; i < loops; i++)
- test_move_memory_region();
+ for (j = 0; j <= disable_slot_zap_quirk; j++) {
+ pr_info("Testing MOVE of in-use region, %d loops, slot zap quirk %s\n",
+ loops, j ? "disabled" : "enabled");
+ for (i = 0; i < loops; i++)
+ test_move_memory_region(!!j);
- pr_info("Testing DELETE of in-use region, %d loops\n", loops);
- for (i = 0; i < loops; i++)
- test_delete_memory_region();
+ pr_info("Testing DELETE of in-use region, %d loops, slot zap quirk %s\n",
+ loops, j ? "disabled" : "enabled");
+ for (i = 0; i < loops; i++)
+ test_delete_memory_region(!!j);
+ }
#endif
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index f6b295e0b2d2..76cc2df9238a 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -47,15 +47,18 @@ static void guest_code(void)
/*
* Single step test, covers 2 basic instructions and 2 emulated
*
- * Enable interrupts during the single stepping to see that
- * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
+ * Enable interrupts during the single stepping to see that pending
+ * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ.
+ *
+ * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler
+ * exits to userspace due to single-step being enabled.
*/
asm volatile("ss_start: "
"sti\n\t"
"xor %%eax,%%eax\n\t"
"cpuid\n\t"
- "movl $0x1a0,%%ecx\n\t"
- "rdmsr\n\t"
+ "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t"
+ "wrmsr\n\t"
"cli\n\t"
: : : "eax", "ebx", "ecx", "edx");
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
index e192720bfe14..74cf19661309 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
@@ -242,7 +242,7 @@ int main(int argc, char *argv[])
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH));
+ TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index b987a3d79715..0ddb63229bcb 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -157,7 +157,7 @@ int main(int argc, char *argv[])
int stage;
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH));
+ TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
/* Create VM */
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
index 7c70c0da4fb7..2e9197eb1652 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
@@ -160,6 +160,36 @@ static void test_sev(void *guest_code, uint64_t policy)
kvm_vm_free(vm);
}
+static void guest_shutdown_code(void)
+{
+ struct desc_ptr idt;
+
+ /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
+ memset(&idt, 0, sizeof(idt));
+ __asm__ __volatile__("lidt %0" :: "m"(idt));
+
+ __asm__ __volatile__("ud2");
+}
+
+static void test_sev_es_shutdown(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ uint32_t type = KVM_X86_SEV_ES_VM;
+
+ vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
+
+ vm_sev_launch(vm, SEV_POLICY_ES, NULL);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Wanted SHUTDOWN, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+
+ kvm_vm_free(vm);
+}
+
int main(int argc, char *argv[])
{
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
@@ -171,6 +201,8 @@ int main(int argc, char *argv[])
test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
test_sev(guest_sev_es_code, SEV_POLICY_ES);
+ test_sev_es_shutdown();
+
if (kvm_has_cap(KVM_CAP_XCRS) &&
(xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) {
test_sync_vmsa(0);
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
index 618cd2442390..88bcca188799 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
@@ -13,6 +13,7 @@
struct xapic_vcpu {
struct kvm_vcpu *vcpu;
bool is_x2apic;
+ bool has_xavic_errata;
};
static void xapic_guest_code(void)
@@ -31,6 +32,10 @@ static void xapic_guest_code(void)
}
}
+#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \
+ GENMASK_ULL(17, 16) | \
+ GENMASK_ULL(13, 13))
+
static void x2apic_guest_code(void)
{
asm volatile("cli");
@@ -41,7 +46,12 @@ static void x2apic_guest_code(void)
uint64_t val = x2apic_read_reg(APIC_IRR) |
x2apic_read_reg(APIC_IRR + 0x10) << 32;
- x2apic_write_reg(APIC_ICR, val);
+ if (val & X2APIC_RSVD_BITS_MASK) {
+ x2apic_write_reg_fault(APIC_ICR, val);
+ } else {
+ x2apic_write_reg(APIC_ICR, val);
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val);
+ }
GUEST_SYNC(val);
} while (1);
}
@@ -71,27 +81,28 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
(u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
if (!x->is_x2apic) {
- val &= (-1u | (0xffull << (32 + 24)));
- TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
- } else {
- TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+ if (!x->has_xavic_errata)
+ val &= (-1u | (0xffull << (32 + 24)));
+ } else if (val & X2APIC_RSVD_BITS_MASK) {
+ return;
}
-}
-#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \
- GENMASK_ULL(17,16) | \
- GENMASK_ULL(13,13))
+ if (x->has_xavic_errata)
+ TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+ else
+ TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+}
static void __test_icr(struct xapic_vcpu *x, uint64_t val)
{
- if (x->is_x2apic) {
- /* Hardware writing vICR register requires reserved bits 31:20,
- * 17:16 and 13 kept as zero to avoid #GP exception. Data value
- * written to vICR should mask out those bits above.
- */
- val &= ~X2APIC_RSVED_BITS_MASK;
- }
- ____test_icr(x, val | APIC_ICR_BUSY);
+ /*
+ * The BUSY bit is reserved on both AMD and Intel, but only AMD treats
+ * it is as _must_ be zero. Intel simply ignores the bit. Don't test
+ * the BUSY bit for x2APIC, as there is no single correct behavior.
+ */
+ if (!x->is_x2apic)
+ ____test_icr(x, val | APIC_ICR_BUSY);
+
____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
}
@@ -231,6 +242,15 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
x.is_x2apic = false;
+ /*
+ * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit),
+ * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel
+ * drops writes, AMD does not). Account for the errata when checking
+ * that KVM reads back what was written.
+ */
+ x.has_xavic_errata = host_cpu_is_amd &&
+ get_kvm_amd_param_bool("avic");
+
vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index e149d0574961..2585087cdf5c 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -10,6 +10,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "hyperv.h"
#define HCALL_REGION_GPA 0xc0000000ULL
#define HCALL_REGION_SLOT 10
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index fc90af2a97b8..bcc73b4e805c 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -15,7 +15,7 @@
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <math.h>
-#include <asm-generic/unistd.h>
+#include <asm/unistd.h>
#include <pthread.h>
#include <sys/resource.h>
#include <assert.h>
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 27f6fdf11969..644915862af8 100644
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -131,6 +131,8 @@ asm (
"_start:\n\t"
#ifdef __x86_64__
"mov %rsp,%rdi\n\t"
+ "and $-16,%rsp\n\t"
+ "sub $8,%rsp\n\t"
"jmp c_main"
#else
"push %esp\n\t"
diff --git a/tools/testing/shared/maple-shared.h b/tools/testing/shared/maple-shared.h
index 3d847edd149d..dc4d30f3860b 100644
--- a/tools/testing/shared/maple-shared.h
+++ b/tools/testing/shared/maple-shared.h
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef __MAPLE_SHARED_H__
+#define __MAPLE_SHARED_H__
#define CONFIG_DEBUG_MAPLE_TREE
#define CONFIG_MAPLE_SEARCH
@@ -7,3 +9,5 @@
#include <stdlib.h>
#include <time.h>
#include "linux/init.h"
+
+#endif /* __MAPLE_SHARED_H__ */
diff --git a/tools/testing/shared/shared.h b/tools/testing/shared/shared.h
index f08f683812ad..13fb4d39966b 100644
--- a/tools/testing/shared/shared.h
+++ b/tools/testing/shared/shared.h
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SHARED_H__
+#define __SHARED_H__
#include <linux/types.h>
#include <linux/bug.h>
@@ -31,3 +33,5 @@
#ifndef dump_stack
#define dump_stack() assert(0)
#endif
+
+#endif /* __SHARED_H__ */
diff --git a/tools/testing/shared/shared.mk b/tools/testing/shared/shared.mk
index a05f0588513a..a6bc51d0b0bf 100644
--- a/tools/testing/shared/shared.mk
+++ b/tools/testing/shared/shared.mk
@@ -15,7 +15,9 @@ SHARED_DEPS = Makefile ../shared/shared.mk ../shared/*.h generated/map-shift.h \
../../../include/linux/maple_tree.h \
../../../include/linux/radix-tree.h \
../../../lib/radix-tree.h \
- ../../../include/linux/idr.h
+ ../../../include/linux/idr.h \
+ ../../../lib/maple_tree.c \
+ ../../../lib/test_maple_tree.c
ifndef SHIFT
SHIFT=3
diff --git a/tools/testing/shared/xarray-shared.h b/tools/testing/shared/xarray-shared.h
index ac2d16ff53ae..d50de7884803 100644
--- a/tools/testing/shared/xarray-shared.h
+++ b/tools/testing/shared/xarray-shared.h
@@ -1,4 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef __XARRAY_SHARED_H__
+#define __XARRAY_SHARED_H__
#define XA_DEBUG
#include "shared.h"
+
+#endif /* __XARRAY_SHARED_H__ */
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 1b90acb6e3fe..375d6285475e 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -40,27 +40,6 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
return 1;
}
-static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last)
-{
- struct kvm_coalesced_mmio_ring *ring;
- unsigned avail;
-
- /* Are we able to batch it ? */
-
- /* last is the first free entry
- * check if we don't meet the first used entry
- * there is always one unused entry in the buffer
- */
- ring = dev->kvm->coalesced_mmio_ring;
- avail = (ring->first - last - 1) % KVM_COALESCED_MMIO_MAX;
- if (avail == 0) {
- /* full */
- return 0;
- }
-
- return 1;
-}
-
static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
struct kvm_io_device *this, gpa_t addr,
int len, const void *val)
@@ -74,9 +53,15 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
spin_lock(&dev->kvm->ring_lock);
+ /*
+ * last is the index of the entry to fill. Verify userspace hasn't
+ * set last to be out of range, and that there is room in the ring.
+ * Leave one entry free in the ring so that userspace can differentiate
+ * between an empty ring and a full ring.
+ */
insert = READ_ONCE(ring->last);
- if (!coalesced_mmio_has_room(dev, insert) ||
- insert >= KVM_COALESCED_MMIO_MAX) {
+ if (insert >= KVM_COALESCED_MMIO_MAX ||
+ (insert + 1) % KVM_COALESCED_MMIO_MAX == READ_ONCE(ring->first)) {
spin_unlock(&dev->kvm->ring_lock);
return -EOPNOTSUPP;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f416d5e3f9c0..05cbb2548d99 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -136,8 +136,8 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file)
#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \
.open = kvm_no_compat_open
#endif
-static int hardware_enable_all(void);
-static void hardware_disable_all(void);
+static int kvm_enable_virtualization(void);
+static void kvm_disable_virtualization(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
@@ -1220,7 +1220,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
if (r)
goto out_err_no_arch_destroy_vm;
- r = hardware_enable_all();
+ r = kvm_enable_virtualization();
if (r)
goto out_err_no_disable;
@@ -1263,7 +1263,7 @@ out_no_coalesced_mmio:
mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
#endif
out_err_no_mmu_notifier:
- hardware_disable_all();
+ kvm_disable_virtualization();
out_err_no_disable:
kvm_arch_destroy_vm(kvm);
out_err_no_arch_destroy_vm:
@@ -1360,7 +1360,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
#endif
kvm_arch_free_vm(kvm);
preempt_notifier_dec();
- hardware_disable_all();
+ kvm_disable_virtualization();
mmdrop(mm);
}
@@ -3270,6 +3270,9 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
int r;
unsigned long addr;
+ if (WARN_ON_ONCE(offset + len > PAGE_SIZE))
+ return -EFAULT;
+
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
@@ -3343,6 +3346,9 @@ static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
int r;
unsigned long addr;
+ if (WARN_ON_ONCE(offset + len > PAGE_SIZE))
+ return -EFAULT;
+
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
@@ -3373,6 +3379,9 @@ static int __kvm_write_guest_page(struct kvm *kvm,
int r;
unsigned long addr;
+ if (WARN_ON_ONCE(offset + len > PAGE_SIZE))
+ return -EFAULT;
+
addr = gfn_to_hva_memslot(memslot, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
@@ -3576,7 +3585,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
int ret;
while ((seg = next_segment(len, offset)) != 0) {
- ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
+ ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, seg);
if (ret < 0)
return ret;
offset = 0;
@@ -5566,137 +5575,67 @@ static struct miscdevice kvm_dev = {
};
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+static bool enable_virt_at_load = true;
+module_param(enable_virt_at_load, bool, 0444);
+
__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
-static DEFINE_PER_CPU(bool, hardware_enabled);
+static DEFINE_PER_CPU(bool, virtualization_enabled);
+static DEFINE_MUTEX(kvm_usage_lock);
static int kvm_usage_count;
-static int __hardware_enable_nolock(void)
+__weak void kvm_arch_enable_virtualization(void)
+{
+
+}
+
+__weak void kvm_arch_disable_virtualization(void)
+{
+
+}
+
+static int kvm_enable_virtualization_cpu(void)
{
- if (__this_cpu_read(hardware_enabled))
+ if (__this_cpu_read(virtualization_enabled))
return 0;
- if (kvm_arch_hardware_enable()) {
+ if (kvm_arch_enable_virtualization_cpu()) {
pr_info("kvm: enabling virtualization on CPU%d failed\n",
raw_smp_processor_id());
return -EIO;
}
- __this_cpu_write(hardware_enabled, true);
+ __this_cpu_write(virtualization_enabled, true);
return 0;
}
-static void hardware_enable_nolock(void *failed)
-{
- if (__hardware_enable_nolock())
- atomic_inc(failed);
-}
-
static int kvm_online_cpu(unsigned int cpu)
{
- int ret = 0;
-
/*
* Abort the CPU online process if hardware virtualization cannot
* be enabled. Otherwise running VMs would encounter unrecoverable
* errors when scheduled to this CPU.
*/
- mutex_lock(&kvm_lock);
- if (kvm_usage_count)
- ret = __hardware_enable_nolock();
- mutex_unlock(&kvm_lock);
- return ret;
+ return kvm_enable_virtualization_cpu();
}
-static void hardware_disable_nolock(void *junk)
+static void kvm_disable_virtualization_cpu(void *ign)
{
- /*
- * Note, hardware_disable_all_nolock() tells all online CPUs to disable
- * hardware, not just CPUs that successfully enabled hardware!
- */
- if (!__this_cpu_read(hardware_enabled))
+ if (!__this_cpu_read(virtualization_enabled))
return;
- kvm_arch_hardware_disable();
+ kvm_arch_disable_virtualization_cpu();
- __this_cpu_write(hardware_enabled, false);
+ __this_cpu_write(virtualization_enabled, false);
}
static int kvm_offline_cpu(unsigned int cpu)
{
- mutex_lock(&kvm_lock);
- if (kvm_usage_count)
- hardware_disable_nolock(NULL);
- mutex_unlock(&kvm_lock);
+ kvm_disable_virtualization_cpu(NULL);
return 0;
}
-static void hardware_disable_all_nolock(void)
-{
- BUG_ON(!kvm_usage_count);
-
- kvm_usage_count--;
- if (!kvm_usage_count)
- on_each_cpu(hardware_disable_nolock, NULL, 1);
-}
-
-static void hardware_disable_all(void)
-{
- cpus_read_lock();
- mutex_lock(&kvm_lock);
- hardware_disable_all_nolock();
- mutex_unlock(&kvm_lock);
- cpus_read_unlock();
-}
-
-static int hardware_enable_all(void)
-{
- atomic_t failed = ATOMIC_INIT(0);
- int r;
-
- /*
- * Do not enable hardware virtualization if the system is going down.
- * If userspace initiated a forced reboot, e.g. reboot -f, then it's
- * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling
- * after kvm_reboot() is called. Note, this relies on system_state
- * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops
- * hook instead of registering a dedicated reboot notifier (the latter
- * runs before system_state is updated).
- */
- if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ||
- system_state == SYSTEM_RESTART)
- return -EBUSY;
-
- /*
- * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu()
- * is called, and so on_each_cpu() between them includes the CPU that
- * is being onlined. As a result, hardware_enable_nolock() may get
- * invoked before kvm_online_cpu(), which also enables hardware if the
- * usage count is non-zero. Disable CPU hotplug to avoid attempting to
- * enable hardware multiple times.
- */
- cpus_read_lock();
- mutex_lock(&kvm_lock);
-
- r = 0;
-
- kvm_usage_count++;
- if (kvm_usage_count == 1) {
- on_each_cpu(hardware_enable_nolock, &failed, 1);
-
- if (atomic_read(&failed)) {
- hardware_disable_all_nolock();
- r = -EBUSY;
- }
- }
-
- mutex_unlock(&kvm_lock);
- cpus_read_unlock();
-
- return r;
-}
-
static void kvm_shutdown(void)
{
/*
@@ -5712,34 +5651,32 @@ static void kvm_shutdown(void)
*/
pr_info("kvm: exiting hardware virtualization\n");
kvm_rebooting = true;
- on_each_cpu(hardware_disable_nolock, NULL, 1);
+ on_each_cpu(kvm_disable_virtualization_cpu, NULL, 1);
}
static int kvm_suspend(void)
{
/*
* Secondary CPUs and CPU hotplug are disabled across the suspend/resume
- * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count
- * is stable. Assert that kvm_lock is not held to ensure the system
- * isn't suspended while KVM is enabling hardware. Hardware enabling
- * can be preempted, but the task cannot be frozen until it has dropped
- * all locks (userspace tasks are frozen via a fake signal).
+ * callbacks, i.e. no need to acquire kvm_usage_lock to ensure the usage
+ * count is stable. Assert that kvm_usage_lock is not held to ensure
+ * the system isn't suspended while KVM is enabling hardware. Hardware
+ * enabling can be preempted, but the task cannot be frozen until it has
+ * dropped all locks (userspace tasks are frozen via a fake signal).
*/
- lockdep_assert_not_held(&kvm_lock);
+ lockdep_assert_not_held(&kvm_usage_lock);
lockdep_assert_irqs_disabled();
- if (kvm_usage_count)
- hardware_disable_nolock(NULL);
+ kvm_disable_virtualization_cpu(NULL);
return 0;
}
static void kvm_resume(void)
{
- lockdep_assert_not_held(&kvm_lock);
+ lockdep_assert_not_held(&kvm_usage_lock);
lockdep_assert_irqs_disabled();
- if (kvm_usage_count)
- WARN_ON_ONCE(__hardware_enable_nolock());
+ WARN_ON_ONCE(kvm_enable_virtualization_cpu());
}
static struct syscore_ops kvm_syscore_ops = {
@@ -5747,13 +5684,95 @@ static struct syscore_ops kvm_syscore_ops = {
.resume = kvm_resume,
.shutdown = kvm_shutdown,
};
+
+static int kvm_enable_virtualization(void)
+{
+ int r;
+
+ guard(mutex)(&kvm_usage_lock);
+
+ if (kvm_usage_count++)
+ return 0;
+
+ kvm_arch_enable_virtualization();
+
+ r = cpuhp_setup_state(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online",
+ kvm_online_cpu, kvm_offline_cpu);
+ if (r)
+ goto err_cpuhp;
+
+ register_syscore_ops(&kvm_syscore_ops);
+
+ /*
+ * Undo virtualization enabling and bail if the system is going down.
+ * If userspace initiated a forced reboot, e.g. reboot -f, then it's
+ * possible for an in-flight operation to enable virtualization after
+ * syscore_shutdown() is called, i.e. without kvm_shutdown() being
+ * invoked. Note, this relies on system_state being set _before_
+ * kvm_shutdown(), e.g. to ensure either kvm_shutdown() is invoked
+ * or this CPU observes the impending shutdown. Which is why KVM uses
+ * a syscore ops hook instead of registering a dedicated reboot
+ * notifier (the latter runs before system_state is updated).
+ */
+ if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ||
+ system_state == SYSTEM_RESTART) {
+ r = -EBUSY;
+ goto err_rebooting;
+ }
+
+ return 0;
+
+err_rebooting:
+ unregister_syscore_ops(&kvm_syscore_ops);
+ cpuhp_remove_state(CPUHP_AP_KVM_ONLINE);
+err_cpuhp:
+ kvm_arch_disable_virtualization();
+ --kvm_usage_count;
+ return r;
+}
+
+static void kvm_disable_virtualization(void)
+{
+ guard(mutex)(&kvm_usage_lock);
+
+ if (--kvm_usage_count)
+ return;
+
+ unregister_syscore_ops(&kvm_syscore_ops);
+ cpuhp_remove_state(CPUHP_AP_KVM_ONLINE);
+ kvm_arch_disable_virtualization();
+}
+
+static int kvm_init_virtualization(void)
+{
+ if (enable_virt_at_load)
+ return kvm_enable_virtualization();
+
+ return 0;
+}
+
+static void kvm_uninit_virtualization(void)
+{
+ if (enable_virt_at_load)
+ kvm_disable_virtualization();
+}
#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
-static int hardware_enable_all(void)
+static int kvm_enable_virtualization(void)
+{
+ return 0;
+}
+
+static int kvm_init_virtualization(void)
{
return 0;
}
-static void hardware_disable_all(void)
+static void kvm_disable_virtualization(void)
+{
+
+}
+
+static void kvm_uninit_virtualization(void)
{
}
@@ -6186,7 +6205,6 @@ static const struct file_operations stat_fops_per_vm = {
.release = kvm_debugfs_release,
.read = simple_attr_read,
.write = simple_attr_write,
- .llseek = no_llseek,
};
static int vm_stat_get(void *_offset, u64 *val)
@@ -6455,15 +6473,6 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
int r;
int cpu;
-#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
- r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online",
- kvm_online_cpu, kvm_offline_cpu);
- if (r)
- return r;
-
- register_syscore_ops(&kvm_syscore_ops);
-#endif
-
/* A kmem cache lets us meet the alignment requirements of fx_save. */
if (!vcpu_align)
vcpu_align = __alignof__(struct kvm_vcpu);
@@ -6474,10 +6483,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
offsetofend(struct kvm_vcpu, stats_id)
- offsetof(struct kvm_vcpu, arch),
NULL);
- if (!kvm_vcpu_cache) {
- r = -ENOMEM;
- goto err_vcpu_cache;
- }
+ if (!kvm_vcpu_cache)
+ return -ENOMEM;
for_each_possible_cpu(cpu) {
if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu),
@@ -6511,6 +6518,10 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
kvm_gmem_init(module);
+ r = kvm_init_virtualization();
+ if (r)
+ goto err_virt;
+
/*
* Registration _must_ be the very last thing done, as this exposes
* /dev/kvm to userspace, i.e. all infrastructure must be setup!
@@ -6524,6 +6535,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
return 0;
err_register:
+ kvm_uninit_virtualization();
+err_virt:
kvm_vfio_ops_exit();
err_vfio:
kvm_async_pf_deinit();
@@ -6534,11 +6547,6 @@ err_cpu_kick_mask:
for_each_possible_cpu(cpu)
free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
kmem_cache_destroy(kvm_vcpu_cache);
-err_vcpu_cache:
-#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
- unregister_syscore_ops(&kvm_syscore_ops);
- cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
-#endif
return r;
}
EXPORT_SYMBOL_GPL(kvm_init);
@@ -6554,16 +6562,14 @@ void kvm_exit(void)
*/
misc_deregister(&kvm_dev);
+ kvm_uninit_virtualization();
+
debugfs_remove_recursive(kvm_debugfs_dir);
for_each_possible_cpu(cpu)
free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
kmem_cache_destroy(kvm_vcpu_cache);
kvm_vfio_ops_exit();
kvm_async_pf_deinit();
-#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
- unregister_syscore_ops(&kvm_syscore_ops);
- cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
-#endif
kvm_irqfd_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);