: Indicate if transfer speed mode setting should be skipped. + * [no]fua: Disable or enable FUA (Force Unit Access) + support for devices supporting this feature. + * dump_id: Dump IDENTIFY data. * disable: Disable this device. diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml new file mode 100644 index 000000000000..e5753a30a29a --- /dev/null +++ b/Documentation/netlink/specs/fou.yaml @@ -0,0 +1,130 @@ +name: fou + +protocol: genetlink-legacy + +doc: | + Foo-over-UDP. + +c-family-name: fou-genl-name +c-version-name: fou-genl-version +max-by-define: true +kernel-policy: global + +definitions: + - + type: enum + name: encap_type + name-prefix: fou-encap- + enum-name: + entries: [ unspec, direct, gue ] + +attribute-sets: + - + name: fou + name-prefix: fou-attr- + attributes: + - + name: unspec + type: unused + - + name: port + type: u16 + byte-order: big-endian + - + name: af + type: u8 + - + name: ipproto + type: u8 + checks: + min: 1 + - + name: type + type: u8 + - + name: remcsum_nopartial + type: flag + - + name: local_v4 + type: u32 + - + name: local_v6 + type: binary + checks: + min-len: 16 + - + name: peer_v4 + type: u32 + - + name: peer_v6 + type: binary + checks: + min-len: 16 + - + name: peer_port + type: u16 + byte-order: big-endian + - + name: ifindex + type: s32 + +operations: + list: + - + name: unspec + doc: unused + + - + name: add + doc: Add port. + attribute-set: fou + + dont-validate: [ strict, dump ] + flags: [ admin-perm ] + + do: + request: &all_attrs + attributes: + - port + - ipproto + - type + - remcsum_nopartial + - local_v4 + - peer_v4 + - local_v6 + - peer_v6 + - peer_port + - ifindex + + - + name: del + doc: Delete port. + attribute-set: fou + + dont-validate: [ strict, dump ] + flags: [ admin-perm ] + + do: + request: &select_attrs + attributes: + - af + - ifindex + - port + - peer_port + - local_v4 + - peer_v4 + - local_v6 + - peer_v6 + + - + name: get + doc: Get tunnel info. + attribute-set: fou + dont-validate: [ strict, dump ] + + do: + request: *select_attrs + reply: *all_attrs + + dump: + reply: *all_attrs diff --git a/Makefile b/Makefile index 1f4696571746..7ae0fee5b79d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 161 +SUBLEVEL = 162 EXTRAVERSION = NAME = Curry Ramen diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 6b0d4bc6c541..e502360de601 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -1819,8 +1819,12 @@ remoteproc_nsp0: remoteproc@1b300000 { clocks = <&rpmhcc RPMH_CXO_CLK>; clock-names = "xo"; - power-domains = <&rpmhpd SC8280XP_NSP>; - power-domain-names = "nsp"; + power-domains = <&rpmhpd SC8280XP_NSP>, + <&rpmhpd SC8280XP_CX>, + <&rpmhpd SC8280XP_MXC>; + power-domain-names = "nsp", + "cx", + "mxc"; memory-region = <&pil_nsp0_mem>; @@ -1950,8 +1954,12 @@ remoteproc_nsp1: remoteproc@21300000 { clocks = <&rpmhcc RPMH_CXO_CLK>; clock-names = "xo"; - power-domains = <&rpmhpd SC8280XP_NSP>; - power-domain-names = "nsp"; + power-domains = <&rpmhpd SC8280XP_NSP>, + <&rpmhpd SC8280XP_CX>, + <&rpmhpd SC8280XP_MXC>; + power-domain-names = "nsp", + "cx", + "mxc"; memory-region = <&pil_nsp1_mem>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts index 1eb287a3f8c0..f3d419799708 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts @@ -427,7 +427,6 @@ &pcie_phy { &pcie0 { ep-gpios = <&gpio2 RK_PD4 GPIO_ACTIVE_HIGH>; - max-link-speed = <2>; num-lanes = <2>; pinctrl-names = "default"; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts index 6a6b36c36ce2..0a55aa44effe 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts @@ -73,7 +73,6 @@ &i2c4 { }; &pcie0 { - max-link-speed = <1>; num-lanes = <1>; vpcie3v3-supply = <&vcc3v3_sys>; }; diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 788597a6b6a2..aa51a4091c93 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -397,7 +397,7 @@ int swsusp_arch_suspend(void) * Memory allocated by get_safe_page() will be dealt with by the hibernate code, * we don't need to free it here. */ -int swsusp_arch_resume(void) +int __nocfi swsusp_arch_resume(void) { int rc; void *zero_page; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 2461bbffe7d4..9fd7f78d11d1 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -317,12 +317,28 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) fpsimd_flush_task_state(current); /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ + if (sve.flags & SVE_SIG_FLAG_SM) { + sme_alloc(current, false); + if (!current->thread.za_state) + return -ENOMEM; + } + sve_alloc(current, true); if (!current->thread.sve_state) { clear_thread_flag(TIF_SVE); return -ENOMEM; } + if (sve.flags & SVE_SIG_FLAG_SM) { + current->thread.svcr |= SVCR_SM_MASK; + set_thread_flag(TIF_SME); + } else { + current->thread.svcr &= ~SVCR_SM_MASK; + set_thread_flag(TIF_SVE); + } + + current->thread.fp_type = FP_STATE_SVE; + err = __copy_from_user(current->thread.sve_state, (char __user const *)user->sve + SVE_SIG_REGS_OFFSET, @@ -330,12 +346,6 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (err) return -EFAULT; - if (sve.flags & SVE_SIG_FLAG_SM) - current->thread.svcr |= SVCR_SM_MASK; - else - set_thread_flag(TIF_SVE); - current->thread.fp_type = FP_STATE_SVE; - fpsimd_only: /* copy the FP and status/control registers */ /* restore_sigframe() already checked that user->fpsimd != NULL. */ @@ -433,6 +443,10 @@ static int restore_za_context(struct user_ctxs *user) fpsimd_flush_task_state(current); /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ + sve_alloc(current, false); + if (!current->thread.sve_state) + return -ENOMEM; + sme_alloc(current, true); if (!current->thread.za_state) { current->thread.svcr &= ~SVCR_ZA_MASK; diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index 1563bf47f3e2..bcc296b1583a 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -637,6 +637,18 @@ static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 conf return pev; } +static inline bool loongarch_pmu_event_requires_counter(const struct perf_event *event) +{ + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_RAW: + return true; + default: + return false; + } +} + static int validate_group(struct perf_event *event) { struct cpu_hw_events fake_cpuc; @@ -644,15 +656,18 @@ static int validate_group(struct perf_event *event) memset(&fake_cpuc, 0, sizeof(fake_cpuc)); - if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) + if (loongarch_pmu_event_requires_counter(leader) && + loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) return -EINVAL; for_each_sibling_event(sibling, leader) { - if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) + if (loongarch_pmu_event_requires_counter(sibling) && + loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) return -EINVAL; } - if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0) + if (loongarch_pmu_event_requires_counter(event) && + loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0) return -EINVAL; return 0; diff --git a/arch/riscv/include/asm/compat.h b/arch/riscv/include/asm/compat.h index 2ac955b51148..c790ac7bfdd8 100644 --- a/arch/riscv/include/asm/compat.h +++ b/arch/riscv/include/asm/compat.h @@ -2,7 +2,7 @@ #ifndef __ASM_COMPAT_H #define __ASM_COMPAT_H -#define COMPAT_UTS_MACHINE "riscv\0\0" +#define COMPAT_UTS_MACHINE "riscv32\0\0" /* * Architecture specific compatibility types diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index a3e1da86181f..b95cab9846dd 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1421,13 +1421,22 @@ static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period struct hw_perf_event *hwc = &event->hw; unsigned int hw_event, bts_event; - if (event->attr.freq) + /* + * Only use BTS for fixed rate period==1 events. + */ + if (event->attr.freq || period != 1) + return false; + + /* + * BTS doesn't virtualize. + */ + if (event->attr.exclude_host) return false; hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - return hw_event == bts_event && period == 1; + return hw_event == bts_event; } static inline bool intel_pmu_has_bts(struct perf_event *event) diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h index ff5c7134a37a..acf9ffa1a171 100644 --- a/arch/x86/include/asm/kfence.h +++ b/arch/x86/include/asm/kfence.h @@ -42,10 +42,34 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) { unsigned int level; pte_t *pte = lookup_address(addr, &level); + pteval_t val; if (WARN_ON(!pte || level != PG_LEVEL_4K)) return false; + val = pte_val(*pte); + + /* + * protect requires making the page not-present. If the PTE is + * already in the right state, there's nothing to do. + */ + if (protect != !!(val & _PAGE_PRESENT)) + return true; + + /* + * Otherwise, invert the entire PTE. This avoids writing out an + * L1TF-vulnerable PTE (not present, without the high address bits + * set). + */ + set_pte(pte, __pte(~val)); + + /* + * If the page was protected (non-present) and we're making it + * present, there is no need to flush the TLB at all. + */ + if (!protect) + return true; + /* * We need to avoid IPIs, as we may get KFENCE allocations or faults * with interrupts disabled. Therefore, the below is best-effort, and @@ -53,11 +77,6 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) * lazy fault handling takes care of faults after the page is PRESENT. */ - if (protect) - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); - else - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - /* * Flush this CPU's TLB, assuming whoever did the allocation/free is * likely to continue running on this CPU. diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 72c33ec07ba7..6a52b217cfe2 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -728,7 +728,8 @@ static __init bool get_mem_config(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) return __get_mem_config_intel(&hw_res->r_resctrl); - else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) return __rdt_get_mem_config_amd(&hw_res->r_resctrl); return false; @@ -863,7 +864,8 @@ static __init void rdt_init_res_defs(void) { if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) rdt_init_res_defs_intel(); - else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) rdt_init_res_defs_amd(); } @@ -894,8 +896,19 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) c->x86_cache_occ_scale = ebx; c->x86_cache_mbm_width_offset = eax & 0xff; - if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset) - c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; + if (!c->x86_cache_mbm_width_offset) { + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; + break; + case X86_VENDOR_HYGON: + c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_HYGON; + break; + default: + /* Leave c->x86_cache_mbm_width_offset as 0 */ + break; + } + } } } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 4761d489a117..c699f3a035cc 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -31,6 +31,9 @@ #define MAX_MBA_BW_AMD 0x800 #define MBM_CNTR_WIDTH_OFFSET_AMD 20 +/* Hygon MBM counter width as an offset from MBM_CNTR_WIDTH_BASE */ +#define MBM_CNTR_WIDTH_OFFSET_HYGON 8 + #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) /* diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 0068648cb7fb..2805ce8f0259 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -294,10 +294,29 @@ EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features); #ifdef CONFIG_X86_64 void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { + struct fpstate *fpstate = guest_fpu->fpstate; + fpregs_lock(); - guest_fpu->fpstate->xfd = xfd; - if (guest_fpu->fpstate->in_use) - xfd_update_state(guest_fpu->fpstate); + + /* + * KVM's guest ABI is that setting XFD[i]=1 *can* immediately revert the + * save state to its initial configuration. Likewise, KVM_GET_XSAVE does + * the same as XSAVE and returns XSTATE_BV[i]=0 whenever XFD[i]=1. + * + * If the guest's FPU state is in hardware, just update XFD: the XSAVE + * in fpu_swap_kvm_fpstate will clear XSTATE_BV[i] whenever XFD[i]=1. + * + * If however the guest's FPU state is NOT resident in hardware, clear + * disabled components in XSTATE_BV now, or a subsequent XRSTOR will + * attempt to load disabled components and generate #NM _in the host_. + */ + if (xfd && test_thread_flag(TIF_NEED_FPU_LOAD)) + fpstate->regs.xsave.header.xfeatures &= ~xfd; + + fpstate->xfd = xfd; + if (fpstate->in_use) + xfd_update_state(fpstate); + fpregs_unlock(); } EXPORT_SYMBOL_GPL(fpu_update_guest_xfd); @@ -405,6 +424,13 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, if (ustate->xsave.header.xfeatures & ~xcr0) return -EINVAL; + /* + * Disabled features must be in their initial state, otherwise XRSTOR + * causes an exception. + */ + if (WARN_ON_ONCE(ustate->xsave.header.xfeatures & kstate->xfd)) + return -EINVAL; + /* * Nullify @vpkru to preserve its current value if PKRU's bit isn't set * in the header. KVM's odd ABI is to leave PKRU untouched in this diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1eb685d4b452..3d63609b91de 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5341,9 +5341,18 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { + union fpregs_state *xstate = (union fpregs_state *)guest_xsave->region; + if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) return 0; + /* + * For backwards compatibility, do not expect disabled features to be in + * their initial state. XSTATE_BV[i] must still be cleared whenever + * XFD[i]=1, or XRSTOR would cause a #NM. + */ + xstate->xsave.header.xfeatures &= ~vcpu->arch.guest_fpu.fpstate->xfd; + return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu, guest_xsave->region, kvm_caps.supported_xcr0, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2fc007752ceb..54f8fe0ea5a9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -835,8 +835,6 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, force_sig_pkuerr((void __user *)address, pkey); else force_sig_fault(SIGSEGV, si_code, (void __user *)address); - - local_irq_disable(); } static noinline void @@ -1429,15 +1427,12 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code, do_kern_addr_fault(regs, error_code, address); } else { do_user_addr_fault(regs, error_code, address); - /* - * User address page fault handling might have reenabled - * interrupts. Fixing up all potential exit points of - * do_user_addr_fault() and its leaf functions is just not - * doable w/o creating an unholy mess or turning the code - * upside down. - */ - local_irq_disable(); } + /* + * page fault handling might have reenabled interrupts, + * make sure to disable them again. + */ + local_irq_disable(); } DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index e0b0ec0f8245..7e3dc0cead96 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -111,12 +111,12 @@ void __init kernel_randomize_memory(void) /* * Adapt physical memory region size based on available memory, - * except when CONFIG_PCI_P2PDMA is enabled. P2PDMA exposes the - * device BAR space assuming the direct map space is large enough - * for creating a ZONE_DEVICE mapping in the direct map corresponding - * to the physical BAR address. + * except when CONFIG_ZONE_DEVICE is enabled. ZONE_DEVICE wants to map + * any physical address into the direct-map. KASLR wants to reliably + * steal some physical address bits. Those design choices are in direct + * conflict. */ - if (!IS_ENABLED(CONFIG_PCI_P2PDMA) && (memory_tb < kaslr_regions[0].size_tb)) + if (!IS_ENABLED(CONFIG_ZONE_DEVICE) && (memory_tb < kaslr_regions[0].size_tb)) kaslr_regions[0].size_tb = memory_tb; /* diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index ef596fc10465..f314192b6de8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -531,8 +531,12 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, struct blkg_iostat_set *bis = per_cpu_ptr(blkg->iostat_cpu, cpu); memset(bis, 0, sizeof(*bis)); + + /* Re-initialize the cleared blkg_iostat_set */ + u64_stats_init(&bis->sync); } memset(&blkg->iostat, 0, sizeof(blkg->iostat)); + u64_stats_init(&blkg->iostat.sync); for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; diff --git a/crypto/authencesn.c b/crypto/authencesn.c index b60e61b1904c..6487b35851d5 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -191,6 +191,9 @@ static int crypto_authenc_esn_encrypt(struct aead_request *req) struct scatterlist *src, *dst; int err; + if (assoclen < 8) + return -EINVAL; + sg_init_table(areq_ctx->src, 2); src = scatterwalk_ffwd(areq_ctx->src, req->src, assoclen); dst = src; @@ -284,6 +287,9 @@ static int crypto_authenc_esn_decrypt(struct aead_request *req) u32 tmp[2]; int err; + if (assoclen < 8) + return -EINVAL; + cryptlen -= authsize; if (req->src != dst) { diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 14bcfebf20b8..96b13b89f0a7 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2513,6 +2513,28 @@ static void ata_dev_config_chs(struct ata_device *dev) dev->heads, dev->sectors); } +static void ata_dev_config_fua(struct ata_device *dev) +{ + /* Ignore FUA support if its use is disabled globally */ + if (!libata_fua) + goto nofua; + + /* Ignore devices without support for WRITE DMA FUA EXT */ + if (!(dev->flags & ATA_DFLAG_LBA48) || !ata_id_has_fua(dev->id))Linux 6.1.162Greg Kroah-Hartman undefinedlinux-kernel@vger.kernel.org, akpm@linux-foundation.org, torvalds@linux-foundation.org, stable@vger.kernel.org undefined undefined undefined undefined undefined undefined