Home | History | Annotate | Line # | Download | only in arch-x86
      1 /******************************************************************************
      2  * arch-x86/mca.h
      3  *
      4  * Contributed by Advanced Micro Devices, Inc.
      5  * Author: Christoph Egger <Christoph.Egger (at) amd.com>
      6  *
      7  * Guest OS machine check interface to x86 Xen.
      8  *
      9  * Permission is hereby granted, free of charge, to any person obtaining a copy
     10  * of this software and associated documentation files (the "Software"), to
     11  * deal in the Software without restriction, including without limitation the
     12  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
     13  * sell copies of the Software, and to permit persons to whom the Software is
     14  * furnished to do so, subject to the following conditions:
     15  *
     16  * The above copyright notice and this permission notice shall be included in
     17  * all copies or substantial portions of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     22  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     25  * DEALINGS IN THE SOFTWARE.
     26  */
     27 
     28 /* Full MCA functionality has the following Usecases from the guest side:
     29  *
     30  * Must have's:
     31  * 1. Dom0 and DomU register machine check trap callback handlers
     32  *    (already done via "set_trap_table" hypercall)
     33  * 2. Dom0 registers machine check event callback handler
     34  *    (doable via EVTCHNOP_bind_virq)
     35  * 3. Dom0 and DomU fetches machine check data
     36  * 4. Dom0 wants Xen to notify a DomU
     37  * 5. Dom0 gets DomU ID from physical address
     38  * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
     39  *
     40  * Nice to have's:
     41  * 7. Dom0 wants Xen to deactivate a physical CPU
     42  *    This is better done as separate task, physical CPU hotplugging,
     43  *    and hypercall(s) should be sysctl's
     44  * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
     45  *    move a DomU (or Dom0 itself) away from a malicious page
     46  *    producing correctable errors.
     47  * 9. offlining physical page:
     48  *    Xen free's and never re-uses a certain physical page.
     49  * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
     50  *     and tell Xen to trigger a machine check
     51  */
     52 
     53 #ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
     54 #define __XEN_PUBLIC_ARCH_X86_MCA_H__
     55 
     56 /* Hypercall */
     57 #define __HYPERVISOR_mca __HYPERVISOR_arch_0
     58 
     59 /*
     60  * The xen-unstable repo has interface version 0x03000001; out interface
     61  * is incompatible with that and any future minor revisions, so we
     62  * choose a different version number range that is numerically less
     63  * than that used in xen-unstable.
     64  */
     65 #define XEN_MCA_INTERFACE_VERSION 0x01ecc003
     66 
     67 /* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */
     68 #define XEN_MC_NONURGENT  0x0001
     69 /* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */
     70 #define XEN_MC_URGENT     0x0002
     71 /* IN: Dom0 acknowledges previosly-fetched telemetry */
     72 #define XEN_MC_ACK        0x0004
     73 
     74 /* OUT: All is ok */
     75 #define XEN_MC_OK           0x0
     76 /* OUT: Domain could not fetch data. */
     77 #define XEN_MC_FETCHFAILED  0x1
     78 /* OUT: There was no machine check data to fetch. */
     79 #define XEN_MC_NODATA       0x2
     80 /* OUT: Between notification time and this hypercall an other
     81  *  (most likely) correctable error happened. The fetched data,
     82  *  does not match the original machine check data. */
     83 #define XEN_MC_NOMATCH      0x4
     84 
     85 /* OUT: DomU did not register MC NMI handler. Try something else. */
     86 #define XEN_MC_CANNOTHANDLE 0x8
     87 /* OUT: Notifying DomU failed. Retry later or try something else. */
     88 #define XEN_MC_NOTDELIVERED 0x10
     89 /* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
     90 
     91 /* Applicable to all mc_vcpuid fields below. */
     92 #define XEN_MC_VCPUID_INVALID 0xffff
     93 
     94 #ifndef __ASSEMBLY__
     95 
     96 #define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
     97 
     98 /*
     99  * Machine Check Architecure:
    100  * structs are read-only and used to report all kinds of
    101  * correctable and uncorrectable errors detected by the HW.
    102  * Dom0 and DomU: register a handler to get notified.
    103  * Dom0 only: Correctable errors are reported via VIRQ_MCA
    104  * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers
    105  */
    106 #define MC_TYPE_GLOBAL          0
    107 #define MC_TYPE_BANK            1
    108 #define MC_TYPE_EXTENDED        2
    109 #define MC_TYPE_RECOVERY        3
    110 
    111 struct mcinfo_common {
    112     uint16_t type;      /* structure type */
    113     uint16_t size;      /* size of this struct in bytes */
    114 };
    115 
    116 
    117 #define MC_FLAG_CORRECTABLE     (1 << 0)
    118 #define MC_FLAG_UNCORRECTABLE   (1 << 1)
    119 #define MC_FLAG_RECOVERABLE	(1 << 2)
    120 #define MC_FLAG_POLLED		(1 << 3)
    121 #define MC_FLAG_RESET		(1 << 4)
    122 #define MC_FLAG_CMCI		(1 << 5)
    123 #define MC_FLAG_MCE		(1 << 6)
    124 /* contains global x86 mc information */
    125 struct mcinfo_global {
    126     struct mcinfo_common common;
    127 
    128     /* running domain at the time in error (most likely the impacted one) */
    129     uint16_t mc_domid;
    130     uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
    131     uint32_t mc_socketid; /* physical socket of the physical core */
    132     uint16_t mc_coreid; /* physical impacted core */
    133     uint16_t mc_core_threadid; /* core thread of physical core */
    134     uint32_t mc_apicid;
    135     uint32_t mc_flags;
    136     uint64_t mc_gstatus; /* global status */
    137 };
    138 
    139 /* contains bank local x86 mc information */
    140 struct mcinfo_bank {
    141     struct mcinfo_common common;
    142 
    143     uint16_t mc_bank; /* bank nr */
    144     uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0
    145                         * and if mc_addr is valid. Never valid on DomU. */
    146     uint64_t mc_status; /* bank status */
    147     uint64_t mc_addr;   /* bank address, only valid
    148                          * if addr bit is set in mc_status */
    149     uint64_t mc_misc;
    150     uint64_t mc_ctrl2;
    151     uint64_t mc_tsc;
    152 };
    153 
    154 
    155 struct mcinfo_msr {
    156     uint64_t reg;   /* MSR */
    157     uint64_t value; /* MSR value */
    158 };
    159 
    160 /* contains mc information from other
    161  * or additional mc MSRs */
    162 struct mcinfo_extended {
    163     struct mcinfo_common common;
    164 
    165     /* You can fill up to five registers.
    166      * If you need more, then use this structure
    167      * multiple times. */
    168 
    169     uint32_t mc_msrs; /* Number of msr with valid values. */
    170     /*
    171      * Currently Intel extended MSR (32/64) include all gp registers
    172      * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
    173      * useful at present. So expand this array to 16/32 to leave room.
    174      */
    175     struct mcinfo_msr mc_msr[sizeof(void *) * 4];
    176 };
    177 
    178 /* Recovery Action flags. Giving recovery result information to DOM0 */
    179 
    180 /* Xen takes successful recovery action, the error is recovered */
    181 #define REC_ACTION_RECOVERED (0x1 << 0)
    182 /* No action is performed by XEN */
    183 #define REC_ACTION_NONE (0x1 << 1)
    184 /* It's possible DOM0 might take action ownership in some case */
    185 #define REC_ACTION_NEED_RESET (0x1 << 2)
    186 
    187 /* Different Recovery Action types, if the action is performed successfully,
    188  * REC_ACTION_RECOVERED flag will be returned.
    189  */
    190 
    191 /* Page Offline Action */
    192 #define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
    193 /* CPU offline Action */
    194 #define MC_ACTION_CPU_OFFLINE (0x1 << 1)
    195 /* L3 cache disable Action */
    196 #define MC_ACTION_CACHE_SHRINK (0x1 << 2)
    197 
    198 /* Below interface used between XEN/DOM0 for passing XEN's recovery action
    199  * information to DOM0.
    200  * usage Senario: After offlining broken page, XEN might pass its page offline
    201  * recovery action result to DOM0. DOM0 will save the information in
    202  * non-volatile memory for further proactive actions, such as offlining the
    203  * easy broken page earlier when doing next reboot.
    204 */
    205 struct page_offline_action
    206 {
    207     /* Params for passing the offlined page number to DOM0 */
    208     uint64_t mfn;
    209     uint64_t status;
    210 };
    211 
    212 struct cpu_offline_action
    213 {
    214     /* Params for passing the identity of the offlined CPU to DOM0 */
    215     uint32_t mc_socketid;
    216     uint16_t mc_coreid;
    217     uint16_t mc_core_threadid;
    218 };
    219 
    220 #define MAX_UNION_SIZE 16
    221 struct mcinfo_recovery
    222 {
    223     struct mcinfo_common common;
    224     uint16_t mc_bank; /* bank nr */
    225     uint8_t action_flags;
    226     uint8_t action_types;
    227     union {
    228         struct page_offline_action page_retire;
    229         struct cpu_offline_action cpu_offline;
    230         uint8_t pad[MAX_UNION_SIZE];
    231     } action_info;
    232 };
    233 
    234 
    235 #define MCINFO_HYPERCALLSIZE	1024
    236 #define MCINFO_MAXSIZE		768
    237 
    238 #define MCINFO_FLAGS_UNCOMPLETE 0x1
    239 struct mc_info {
    240     /* Number of mcinfo_* entries in mi_data */
    241     uint32_t mi_nentries;
    242     uint32_t flags;
    243     uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
    244 };
    245 typedef struct mc_info mc_info_t;
    246 DEFINE_XEN_GUEST_HANDLE(mc_info_t);
    247 
    248 #define __MC_MSR_ARRAYSIZE 8
    249 #define __MC_NMSRS 1
    250 #define MC_NCAPS	7	/* 7 CPU feature flag words */
    251 #define MC_CAPS_STD_EDX	0	/* cpuid level 0x00000001 (%edx) */
    252 #define MC_CAPS_AMD_EDX	1	/* cpuid level 0x80000001 (%edx) */
    253 #define MC_CAPS_TM	2	/* cpuid level 0x80860001 (TransMeta) */
    254 #define MC_CAPS_LINUX	3	/* Linux-defined */
    255 #define MC_CAPS_STD_ECX	4	/* cpuid level 0x00000001 (%ecx) */
    256 #define MC_CAPS_VIA	5	/* cpuid level 0xc0000001 */
    257 #define MC_CAPS_AMD_ECX	6	/* cpuid level 0x80000001 (%ecx) */
    258 
    259 struct mcinfo_logical_cpu {
    260     uint32_t mc_cpunr;
    261     uint32_t mc_chipid;
    262     uint16_t mc_coreid;
    263     uint16_t mc_threadid;
    264     uint32_t mc_apicid;
    265     uint32_t mc_clusterid;
    266     uint32_t mc_ncores;
    267     uint32_t mc_ncores_active;
    268     uint32_t mc_nthreads;
    269     int32_t mc_cpuid_level;
    270     uint32_t mc_family;
    271     uint32_t mc_vendor;
    272     uint32_t mc_model;
    273     uint32_t mc_step;
    274     char mc_vendorid[16];
    275     char mc_brandid[64];
    276     uint32_t mc_cpu_caps[MC_NCAPS];
    277     uint32_t mc_cache_size;
    278     uint32_t mc_cache_alignment;
    279     int32_t mc_nmsrvals;
    280     struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
    281 };
    282 typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t;
    283 DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);
    284 
    285 
    286 /*
    287  * OS's should use these instead of writing their own lookup function
    288  * each with its own bugs and drawbacks.
    289  * We use macros instead of static inline functions to allow guests
    290  * to include this header in assembly files (*.S).
    291  */
    292 /* Prototype:
    293  *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
    294  */
    295 #define x86_mcinfo_nentries(_mi)    \
    296     (_mi)->mi_nentries
    297 /* Prototype:
    298  *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
    299  */
    300 #define x86_mcinfo_first(_mi)       \
    301     ((struct mcinfo_common *)(_mi)->mi_data)
    302 /* Prototype:
    303  *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
    304  */
    305 #define x86_mcinfo_next(_mic)       \
    306     ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
    307 
    308 /* Prototype:
    309  *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
    310  */
    311 #define x86_mcinfo_lookup(_ret, _mi, _type)    \
    312     do {                                                        \
    313         uint32_t found, i;                                      \
    314         struct mcinfo_common *_mic;                             \
    315                                                                 \
    316         found = 0;                                              \
    317         (_ret) = NULL;                                          \
    318         if (_mi == NULL) break;                                 \
    319         _mic = x86_mcinfo_first(_mi);                           \
    320         for (i = 0; i < x86_mcinfo_nentries(_mi); i++) {        \
    321             if (_mic->type == (_type)) {                        \
    322                 found = 1;                                      \
    323                 break;                                          \
    324             }                                                   \
    325             _mic = x86_mcinfo_next(_mic);                       \
    326         }                                                       \
    327         (_ret) = found ? _mic : NULL;                           \
    328     } while (0)
    329 
    330 
    331 /* Usecase 1
    332  * Register machine check trap callback handler
    333  *    (already done via "set_trap_table" hypercall)
    334  */
    335 
    336 /* Usecase 2
    337  * Dom0 registers machine check event callback handler
    338  * done by EVTCHNOP_bind_virq
    339  */
    340 
    341 /* Usecase 3
    342  * Fetch machine check data from hypervisor.
    343  * Note, this hypercall is special, because both Dom0 and DomU must use this.
    344  */
    345 #define XEN_MC_fetch            1
    346 struct xen_mc_fetch {
    347     /* IN/OUT variables. */
    348     uint32_t flags;	/* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
    349                            XEN_MC_ACK if ack'ing an earlier fetch */
    350                        /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED,
    351                           XEN_MC_NODATA, XEN_MC_NOMATCH */
    352     uint32_t _pad0;
    353     uint64_t fetch_id;	/* OUT: id for ack, IN: id we are ack'ing */
    354 
    355     /* OUT variables. */
    356     XEN_GUEST_HANDLE(mc_info_t) data;
    357 };
    358 typedef struct xen_mc_fetch xen_mc_fetch_t;
    359 DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t);
    360 
    361 
    362 /* Usecase 4
    363  * This tells the hypervisor to notify a DomU about the machine check error
    364  */
    365 #define XEN_MC_notifydomain     2
    366 struct xen_mc_notifydomain {
    367     /* IN variables. */
    368     uint16_t mc_domid;    /* The unprivileged domain to notify. */
    369     uint16_t mc_vcpuid;   /* The vcpu in mc_domid to notify.
    370                            * Usually echo'd value from the fetch hypercall. */
    371 
    372     /* IN/OUT variables. */
    373     uint32_t flags;
    374 
    375 /* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */
    376 /* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
    377 };
    378 typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
    379 DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
    380 
    381 #define XEN_MC_physcpuinfo 3
    382 struct xen_mc_physcpuinfo {
    383     /* IN/OUT */
    384     uint32_t ncpus;
    385     uint32_t _pad0;
    386     /* OUT */
    387     XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;
    388 };
    389 
    390 #define XEN_MC_msrinject    4
    391 #define MC_MSRINJ_MAXMSRS       8
    392 struct xen_mc_msrinject {
    393     /* IN */
    394     uint32_t mcinj_cpunr;           /* target processor id */
    395     uint32_t mcinj_flags;           /* see MC_MSRINJ_F_* below */
    396     uint32_t mcinj_count;           /* 0 .. count-1 in array are valid */
    397     domid_t  mcinj_domid;           /* valid only if MC_MSRINJ_F_GPADDR is
    398                                        present in mcinj_flags */
    399     uint16_t _pad0;
    400     struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
    401 };
    402 
    403 /* Flags for mcinj_flags above; bits 16-31 are reserved */
    404 #define MC_MSRINJ_F_INTERPOSE   0x1
    405 #define MC_MSRINJ_F_GPADDR      0x2
    406 
    407 #define XEN_MC_mceinject    5
    408 struct xen_mc_mceinject {
    409     unsigned int mceinj_cpunr;      /* target processor id */
    410 };
    411 
    412 #if defined(__XEN__) || defined(__XEN_TOOLS__)
    413 #define XEN_MC_inject_v2        6
    414 #define XEN_MC_INJECT_TYPE_MASK     0x7
    415 #define XEN_MC_INJECT_TYPE_MCE      0x0
    416 #define XEN_MC_INJECT_TYPE_CMCI     0x1
    417 #define XEN_MC_INJECT_TYPE_LMCE     0x2
    418 
    419 #define XEN_MC_INJECT_CPU_BROADCAST 0x8
    420 
    421 struct xen_mc_inject_v2 {
    422     uint32_t flags;
    423     struct xenctl_bitmap cpumap;
    424 };
    425 #endif
    426 
    427 struct xen_mc {
    428     uint32_t cmd;
    429     uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
    430     union {
    431         struct xen_mc_fetch        mc_fetch;
    432         struct xen_mc_notifydomain mc_notifydomain;
    433         struct xen_mc_physcpuinfo  mc_physcpuinfo;
    434         struct xen_mc_msrinject    mc_msrinject;
    435         struct xen_mc_mceinject    mc_mceinject;
    436 #if defined(__XEN__) || defined(__XEN_TOOLS__)
    437         struct xen_mc_inject_v2    mc_inject_v2;
    438 #endif
    439     } u;
    440 };
    441 typedef struct xen_mc xen_mc_t;
    442 DEFINE_XEN_GUEST_HANDLE(xen_mc_t);
    443 
    444 #endif /* __ASSEMBLY__ */
    445 
    446 #endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
    447