key access functions to work with the new gmap. Acked-by: Heiko Carstens Signed-off-by: Claudio Imbrenda --- arch/s390/Kconfig | 2 +- arch/s390/include/asm/kvm_host.h | 5 +- arch/s390/include/asm/mmu_context.h | 4 - arch/s390/include/asm/tlb.h | 3 - arch/s390/include/asm/uaccess.h | 70 +-- arch/s390/include/asm/uv.h | 1 - arch/s390/kernel/uv.c | 114 +--- arch/s390/kvm/Makefile | 2 +- arch/s390/kvm/diag.c | 2 +- arch/s390/kvm/gaccess.c | 885 +++++++++++++++++----------- arch/s390/kvm/gaccess.h | 18 +- arch/s390/kvm/gmap-vsie.c | 141 ----- arch/s390/kvm/intercept.c | 15 +- arch/s390/kvm/interrupt.c | 6 +- arch/s390/kvm/kvm-s390.c | 781 +++++++----------------- arch/s390/kvm/kvm-s390.h | 19 +- arch/s390/kvm/priv.c | 213 +++---- arch/s390/kvm/pv.c | 174 ++++-- arch/s390/kvm/vsie.c | 168 +++--- arch/s390/lib/uaccess.c | 184 +----- arch/s390/mm/gmap_helpers.c | 38 +- 21 files changed, 1119 insertions(+), 1726 deletions(-) delete mode 100644 arch/s390/kvm/gmap-vsie.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0e5fad5f06ca..8270754985e9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -33,7 +33,7 @@ config GENERIC_LOCKBREAK def_bool y if PREEMPTION config PGSTE - def_bool y if KVM + def_bool n config AUDIT_ARCH def_bool y diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 816776a8a8e3..64a50f0862aa 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -442,7 +442,7 @@ struct kvm_vcpu_arch { bool acrs_loaded; struct kvm_s390_pv_vcpu pv; union diag318_info diag318_info; - void *mc; /* Placeholder */ + struct kvm_s390_mmu_cache *mc; }; struct kvm_vm_stat { @@ -636,6 +636,8 @@ struct kvm_s390_pv { struct mutex import_lock; }; +struct kvm_s390_mmu_cache; + struct kvm_arch { struct esca_block *sca; debug_info_t *dbf; @@ -675,6 +677,7 @@ struct kvm_arch { struct kvm_s390_pv pv; struct list_head kzdev_list; spinlock_t kzdev_list_lock; + struct kvm_s390_mmu_cache *mc; }; #define KVM_HVA_ERR_BAD (-1UL) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 48e548c01daa..bd1ef5e2d2eb 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -30,11 +30,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.gmap_asce = 0; mm->context.flush_mm = 0; #if IS_ENABLED(CONFIG_KVM) - mm->context.has_pgste = 0; - mm->context.uses_skeys = 0; - mm->context.uses_cmm = 0; mm->context.allow_cow_sharing = 1; - mm->context.allow_gmap_hpage_1m = 0; #endif switch (mm->context.asce_limit) { default: diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 1e50f6f1ad9d..7354b42ee994 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -36,7 +36,6 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb, #include #include -#include /* * Release the page cache reference for a pte removed by @@ -85,8 +84,6 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_pmds = 1; - if (mm_has_pgste(tlb->mm)) - gmap_unlink(tlb->mm, (unsigned long *)pte, address); tlb_remove_ptdesc(tlb, virt_to_ptdesc(pte)); } diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index c5e02addcd67..dff035372601 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -471,65 +471,15 @@ do { \ #define arch_get_kernel_nofault __mvc_kernel_nofault #define arch_put_kernel_nofault __mvc_kernel_nofault -void __cmpxchg_user_key_called_with_bad_pointer(void); - -int __cmpxchg_user_key1(unsigned long address, unsigned char *uval, - unsigned char old, unsigned char new, unsigned long key); -int __cmpxchg_user_key2(unsigned long address, unsigned short *uval, - unsigned short old, unsigned short new, unsigned long key); -int __cmpxchg_user_key4(unsigned long address, unsigned int *uval, - unsigned int old, unsigned int new, unsigned long key); -int __cmpxchg_user_key8(unsigned long address, unsigned long *uval, - unsigned long old, unsigned long new, unsigned long key); -int __cmpxchg_user_key16(unsigned long address, __uint128_t *uval, - __uint128_t old, __uint128_t new, unsigned long key); - -static __always_inline int _cmpxchg_user_key(unsigned long address, void *uval, - __uint128_t old, __uint128_t new, - unsigned long key, int size) -{ - switch (size) { - case 1: return __cmpxchg_user_key1(address, uval, old, new, key); - case 2: return __cmpxchg_user_key2(address, uval, old, new, key); - case 4: return __cmpxchg_user_key4(address, uval, old, new, key); - case 8: return __cmpxchg_user_key8(address, uval, old, new, key); - case 16: return __cmpxchg_user_key16(address, uval, old, new, key); - default: __cmpxchg_user_key_called_with_bad_pointer(); - } - return 0; -} - -/** - * cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys - * @ptr: User space address of value to compare to @old and exchange with - * @new. Must be aligned to sizeof(*@ptr). - * @uval: Address where the old value of *@ptr is written to. - * @old: Old value. Compared to the content pointed to by @ptr in order to - * determine if the exchange occurs. The old value read from *@ptr is - * written to *@uval. - * @new: New value to place at *@ptr. - * @key: Access key to use for checking storage key protection. - * - * Perform a cmpxchg on a user space target, honoring storage key protection. - * @key alone determines how key checking is performed, neither - * storage-protection-override nor fetch-protection-override apply. - * The caller must compare *@uval and @old to determine if values have been - * exchanged. In case of an exception *@uval is set to zero. - * - * Return: 0: cmpxchg executed - * -EFAULT: an exception happened when trying to access *@ptr - * -EAGAIN: maxed out number of retries (byte and short only) - */ -#define cmpxchg_user_key(ptr, uval, old, new, key) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(uval) __uval = (uval); \ - \ - BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \ - might_fault(); \ - __chk_user_ptr(__ptr); \ - _cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \ - (old), (new), (key), sizeof(*(__ptr))); \ -}) +int __cmpxchg_key1(void *address, unsigned char *uval, unsigned char old, + unsigned char new, unsigned long key); +int __cmpxchg_key2(void *address, unsigned short *uval, unsigned short old, + unsigned short new, unsigned long key); +int __cmpxchg_key4(void *address, unsigned int *uval, unsigned int old, + unsigned int new, unsigned long key); +int __cmpxchg_key8(void *address, unsigned long *uval, unsigned long old, + unsigned long new, unsigned long key); +int __cmpxchg_key16(void *address, __uint128_t *uval, __uint128_t old, + __uint128_t new, unsigned long key); #endif /* __S390_UACCESS_H */ diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0744874ca6df..d919e69662f5 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -631,7 +631,6 @@ int uv_pin_shared(unsigned long paddr); int uv_destroy_folio(struct folio *folio); int uv_destroy_pte(pte_t pte); int uv_convert_from_secure_pte(pte_t pte); -int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb); int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio); int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb); int uv_convert_from_secure(unsigned long paddr); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index cb4e8089fbca..a284f98d9716 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -209,39 +209,6 @@ int uv_convert_from_secure_pte(pte_t pte) return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); } -/** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) -{ - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} - /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -313,20 +280,6 @@ int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) } EXPORT_SYMBOL(__make_folio_secure); -static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) -{ - int rc; - - if (!folio_trylock(folio)) - return -EAGAIN; - if (should_export_before_import(uvcb, mm)) - uv_convert_from_secure(folio_to_phys(folio)); - rc = __make_folio_secure(folio, uvcb); - folio_unlock(folio); - - return rc; -} - /** * s390_wiggle_split_folio() - try to drain extra references to a folio and * split the folio if it is large. @@ -414,56 +367,6 @@ int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) } EXPORT_SYMBOL_GPL(s390_wiggle_split_folio); -int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) -{ - struct vm_area_struct *vma; - struct folio_walk fw; - struct folio *folio; - int rc; - - mmap_read_lock(mm); - vma = vma_lookup(mm, hva); - if (!vma) { - mmap_read_unlock(mm); - return -EFAULT; - } - folio = folio_walk_start(&fw, vma, hva, 0); - if (!folio) { - mmap_read_unlock(mm); - return -ENXIO; - } - - folio_get(folio); - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. - * If userspace plays dirty tricks and decides to map huge pages at a - * later point in time, it will receive a segmentation fault or - * KVM_RUN will return -EFAULT. - */ - if (folio_test_hugetlb(folio)) - rc = -EFAULT; - else if (folio_test_large(folio)) - rc = -E2BIG; - else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) - rc = -ENXIO; - else - rc = make_folio_secure(mm, folio, uvcb); - folio_walk_end(&fw, vma); - mmap_read_unlock(mm); - - if (rc == -E2BIG || rc == -EBUSY) { - rc = s390_wiggle_split_folio(mm, folio); - if (!rc) - rc = -EAGAIN; - } - folio_put(folio); - - return rc; -} -EXPORT_SYMBOL_GPL(make_hva_secure); - /* * To be called with the folio locked or with an extra reference! This will * prevent kvm_s390_pv_make_secure() from touching the folio concurrently. @@ -474,21 +377,18 @@ int arch_make_folio_accessible(struct folio *folio) { int rc = 0; - /* Large folios cannot be secure */ - if (unlikely(folio_test_large(folio))) - return 0; - /* - * PG_arch_1 is used in 2 places: - * 1. for storage keys of hugetlb folios and KVM - * 2. As an indication that this small folio might be secure. This can - * overindicate, e.g. we set the bit before calling - * convert_to_secure. - * As secure pages are never large folios, both variants can co-exists. + * PG_arch_1 is used as an indication that this small folio might be + * secure. This can overindicate, e.g. we set the bit before calling + * convert_to_secure. */ if (!test_bit(PG_arch_1, &folio->flags.f)) return 0; + /* Large folios cannot be secure. */ + if (WARN_ON_ONCE(folio_test_large(folio))) + return -EFAULT; + rc = uv_pin_shared(folio_to_phys(folio)); if (!rc) { clear_bit(PG_arch_1, &folio->flags.f); diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 1e2dcd3e2436..dac9d53b23d8 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o +kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o kvm-y += dat.o gmap.o faultin.o kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 53233dec8cad..d89d1c381522 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -10,13 +10,13 @@ #include #include -#include #include #include #include "kvm-s390.h" #include "trace.h" #include "trace-s390.h" #include "gaccess.h" +#include "gmap.h" static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end) { diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 2649365bf054..67de47a81a87 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -11,15 +11,43 @@ #include #include #include +#include +#include +#include #include #include -#include #include #include "kvm-s390.h" +#include "dat.h" +#include "gmap.h" #include "gaccess.h" +#include "faultin.h" #define GMAP_SHADOW_FAKE_TABLE 1ULL +union dat_table_entry { + unsigned long val; + union region1_table_entry pgd; + union region2_table_entry p4d; + union region3_table_entry pud; + union segment_table_entry pmd; + union page_table_entry pte; +}; + +#define WALK_N_ENTRIES 7 +#define LEVEL_MEM -2 +struct pgtwalk { + struct guest_fault raw_entries[WALK_N_ENTRIES]; + gpa_t last_addr; + int level; + bool p; +}; + +static inline struct guest_fault *get_entries(struct pgtwalk *w) +{ + return w->raw_entries - LEVEL_MEM; +} + /* * raddress union which will contain the result (real or absolute address) * after a page table walk. The rfaa, sfaa and pfra members are used to @@ -81,6 +109,28 @@ struct aste { /* .. more fields there */ }; +union oac { + unsigned int val; + struct { + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac1; + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac2; + }; +}; + int ipte_lock_held(struct kvm *kvm) { if (sclp.has_siif) @@ -603,28 +653,16 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, static int vm_check_access_key_gpa(struct kvm *kvm, u8 access_key, enum gacc_mode mode, gpa_t gpa) { - u8 storage_key, access_control; - bool fetch_protected; - unsigned long hva; + union skey storage_key; int r; - if (access_key == 0) - return 0; - - hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); - if (kvm_is_error_hva(hva)) - return PGM_ADDRESSING; - - mmap_read_lock(current->mm); - r = get_guest_storage_key(current->mm, hva, &storage_key); - mmap_read_unlock(current->mm); + scoped_guard(read_lock, &kvm->mmu_lock) + r = dat_get_storage_key(kvm->arch.gmap->asce, gpa_to_gfn(gpa), &storage_key); if (r) return r; - access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); - if (access_control == access_key) + if (access_key == 0 || storage_key.acc == access_key) return 0; - fetch_protected = storage_key & _PAGE_FP_BIT; - if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected) + if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !storage_key.fp) return 0; return PGM_PROTECTION; } @@ -667,8 +705,7 @@ static int vcpu_check_access_key_gpa(struct kvm_vcpu *vcpu, u8 access_key, enum gacc_mode mode, union asce asce, gpa_t gpa, unsigned long ga, unsigned int len) { - u8 storage_key, access_control; - unsigned long hva; + union skey storage_key; int r; /* access key 0 matches any storage key -> allow */ @@ -678,26 +715,23 @@ static int vcpu_check_access_key_gpa(struct kvm_vcpu *vcpu, u8 access_key, * caller needs to ensure that gfn is accessible, so we can * assume that this cannot fail */ - hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa)); - mmap_read_lock(current->mm); - r = get_guest_storage_key(current->mm, hva, &storage_key); - mmap_read_unlock(current->mm); + scoped_guard(read_lock, &vcpu->kvm->mmu_lock) + r = dat_get_storage_key(vcpu->arch.gmap->asce, gpa_to_gfn(gpa), &storage_key); if (r) return r; - access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); /* access key matches storage key -> allow */ - if (access_control == access_key) + if (storage_key.acc == access_key) return 0; if (mode == GACC_FETCH || mode == GACC_IFETCH) { /* it is a fetch and fetch protection is off -> allow */ - if (!(storage_key & _PAGE_FP_BIT)) + if (!storage_key.fp) return 0; if (fetch_prot_override_applicable(vcpu, mode, asce) && fetch_prot_override_applies(ga, len)) return 0; } if (storage_prot_override_applicable(vcpu) && - storage_prot_override_applies(access_control)) + storage_prot_override_applies(storage_key.acc)) return 0; return PGM_PROTECTION; } @@ -797,37 +831,79 @@ static int access_guest_page_gpa(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa return rc; } +static int mvcos_key(void *to, const void *from, unsigned long size, u8 dst_key, u8 src_key) +{ + union oac spec = { + .oac1.key = dst_key, + .oac1.k = !!dst_key, + .oac2.key = src_key, + .oac2.k = !!src_key, + }; + int exception = PGM_PROTECTION; + + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: lhi %[exc],0\n" + "2:\n" + EX_TABLE(0b, 2b) + EX_TABLE(1b, 2b) + : [size] "+d" (size), [to] "=Q" (*(char *)to), [exc] "+d" (exception) + : [spec] "d" (spec.val), [from] "Q" (*(const char *)from) + : "memory", "cc", "0"); + return exception; +} + +struct acc_page_key_context { + void *data; + int exception; + unsigned short offset; + unsigned short len; + bool store; + u8 access_key; +}; + +static void _access_guest_page_with_key_gpa(struct guest_fault *f) +{ + struct acc_page_key_context *context = f->priv; + void *ptr; + int r; + + ptr = __va(PFN_PHYS(f->pfn) | context->offset); + + if (context->store) + r = mvcos_key(ptr, context->data, context->len, context->access_key, 0); + else + r = mvcos_key(context->data, ptr, context->len, 0, context->access_key); + + context->exception = r; +} + static int access_guest_page_with_key_gpa(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, - void *data, unsigned int len, u8 access_key) + void *data, unsigned int len, u8 acc) { - struct kvm_memory_slot *slot; - bool writable; - gfn_t gfn; - hva_t hva; + struct acc_page_key_context context = { + .offset = offset_in_page(gpa), + .len = len, + .data = data, + .access_key = acc, + .store = mode == GACC_STORE, + }; + struct guest_fault fault = { + .gfn = gpa_to_gfn(gpa), + .priv = &context, + .write_attempt = mode == GACC_STORE,[GIT PULL v1 25/36] KVM: s390: Switch to new gmapClaudio Imbrenda undefinedpbonzini@redhat.com undefined undefined undefined undefined undefined undefined