I want to track the cache request number in the last level cache. I wrote a Linux module to get this information based on a tutorial here .
It can compile and run, but the output is always 0. In other words, when I use it rdmsr, it always gives me edx = 0, eax = 0. I even tried the demo code in the tutorial , the output is still 0.
I got stuck in this problem for a whole week. Can someone help me point out the error that I made in the program?
I knew that there are some existing programs that do the same, but I need to know how to write the code myself, because I want to control the cache request in the Xen hypervisor. I cannot use these tools in Xen unless I include the tools in the Xen hypervisor, which seems more useful.
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#define PERFEVTSEL0 0x186
#define PERFEVTSEL1 0x187
#define PERFEVTSEL2 0x188
#define PERFEVTSEL3 0x189
#define PMC0 0xc1
#define PMC1 0xc2
#define PMC2 0xc2
#define PMC3 0xc3
#define L1I_ALLHIT_EVENT 0x80
#define L1I_ALLHIT_MASK 0x01
#define L1I_ALLMISS_EVENT 0x80
#define L1I_ALLMISS_MASK 0x02
#define L1D_ALLREQ_EVENT 0x43
#define L1D_ALLREQ_MASK 0x01
#define L1D_LDMISS_EVENT 0x40
#define L1D_LDMISS_MASK 0x01
#define L1D_STMISS_EVENT 0x28
#define L1D_STMISS_MASK 0x01
#define L2_ALLREQ_EVENT 0x24
#define L2_ALLREQ_MASK L2_ALLCODEREQ_MASK
#define L2_ALLMISS_EVENT 0x24
#define L2_ALLMISS_MASK L2_ALLCODEMISS_MASK
#define L2_ALLCODEREQ_MASK 0x30
#define L2_ALLCODEMISS_MASK 0x20
#define L3_ALLREQ_EVENT 0x2E
#define L3_ALLREQ_MASK 0x4F
#define L3_ALLMISS_EVENT 0x2E
#define L3_ALLMISS_MASK 0x41
#define USR_BIT (0x01UL << 16)
#define OS_BIT (0x01UL << 17)
#define SET_MSR_USR_BIT(eax) eax |= USR_BIT
#define CLEAR_MSR_USR_BIT(exa) eax &= (~USR_BIT)
#define SET_MSR_OS_BIT(eax) eax |= OS_BIT
#define CLEAR_MSR_OS_BIT(eax) eax &= (~OS_BIT)
#define SET_EVENT_MASK(eax, event, umask) eax |= (event | (umask << 8))
#define MSR_ENFLAG (0x1<<22)
static inline void rtxen_write_msr(uint32_t eax, uint32_t ecx)
{
__asm__ __volatile__ ("movl %0, %%ecx\n\t"
"xorl %%edx, %%edx\n\t"
"xorl %%eax, %%eax\n\t"
"wrmsr\n\t"
:
: "m" (ecx)
: "eax", "ecx", "edx" );
eax |= MSR_ENFLAG;
__asm__("movl %0, %%ecx\n\t"
"xorl %%edx, %%edx\n\t"
"movl %1, %%eax\n\t"
"wrmsr\n\t"
:
: "m" (ecx), "m" (eax)
: "eax", "ecx", "edx" );
}
static inline void rtxen_read_msr(uint32_t* ecx, uint32_t *eax, uint32_t* edx)
{ __asm__ __volatile__(\
"rdmsr"\
:"=d" (*edx), "=a" (*eax)\
:"c"(*ecx)
);
}
static inline void delay(void )
{
char tmp[1000];
int i;
for( i = 0; i < 1000; i++ )
{
tmp[i] = i * 2;
}
}
enum cache_level
{
UOPS,
L1I,
L1D,
L2,
L3
};
int init_module(void)
{
enum cache_level op;
uint32_t eax, edx, ecx;
uint64_t l3_all;
op = UOPS;
switch(op)
{
case UOPS:
eax = 0x0001010E;
eax |= MSR_ENFLAG;
ecx = 0x187;
printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
ecx = 0xc2;
eax = 1;
edx = 2;
rtxen_read_msr(&ecx, &eax, &edx);
printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax);
break;
case L3:
eax = 0;
SET_MSR_USR_BIT(eax);
SET_MSR_OS_BIT(eax);
SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK);
eax |= MSR_ENFLAG;
ecx = PERFEVTSEL2;
printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
printk(KERN_INFO "L3 all request set MSR PMC2\n");
printk(KERN_INFO "delay by access an array\n");
delay();
ecx = PMC2;
eax = 1;
edx = 2;
printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx);
rtxen_read_msr(&ecx, &eax, &edx);
l3_all = ( ((uint64_t) edx << 32) | eax );
printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all);
break;
default:
printk(KERN_INFO "operation not implemented yet\n");
}
return 0;
}
void cleanup_module(void)
{
printk(KERN_INFO "Goodbye world 1.\n");
}
Result:
[ 1780.946584] UOPS Demo: write_msr: eax=0x0001010e, ecx=0x00000187
[ 1780.946590] UOPS Demo: read_msr: edx=0x00000000, eax=0x00000000
[ 1818.595055] Goodbye world 1.
[ 1821.153947] UOPS Demo: write_msr: eax=0x0041010e, ecx=0x00000187
[ 1821.153950] UOPS Demo: read_msr: edx=0x00000000, eax=0x00000000