aboutsummaryrefslogtreecommitdiff
path: root/libop/op_hw_specific.h
blob: 27c44ec7ad3556d0a852a3f9ff25726768db44d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/* 
 * @file architecture specific interfaces
 * @remark Copyright 2008 Intel Corporation
 * @remark Read the file COPYING
 * @author Andi Kleen
 */

#if defined(__i386__) || defined(__x86_64__) 

/* Assume we run on the same host as the profilee */

#define num_to_mask(x) ((1U << (x)) - 1)

static inline int cpuid_vendor(char *vnd)
{
	union {
		struct {
			unsigned b,d,c;
		};
		char v[12];
	} v;
	unsigned eax;
#ifdef __PIC__
        __asm__ __volatile__(
            "pushl %%ebx\n"      /* must be preserved due to PIC code */
            "cpuid\n"
            "mov %%ebx, 0(%%edi)\n"
            "mov %%ecx, 4(%%edi)\n"
            "mov %%edx, 8(%%edi)\n"
            "popl %%ebx\n"
            : "=a" (eax)
            : "a"(0), "D"(v.v)
            : "%ecx", "%edx"
        );
#else
	asm("cpuid" : "=a" (eax), "=b" (v.b), "=c" (v.c), "=d" (v.d) : "0" (0));
#endif
	return !strncmp(v.v, vnd, 12);
}

static inline unsigned arch_cpuid_1(int code)
{
    unsigned val;
#ifdef __PIC__
        __asm__ __volatile__ (
            "pushl %%ebx\n"
            "cpuid\n"
            "popl %%ebx\n"
            : "=a" (val)
            : "a" (code)
            : "ecx", "edx"
        );
#else
        asm("cpuid" : "=a" (v.eax) : "a" (code) : "ecx","ebx","edx");
#endif
        return val;
}


/* Work around Nehalem spec update AAJ79: CPUID incorrectly indicates
   unhalted reference cycle architectural event is supported. We assume
   steppings after C0 report correct data in CPUID. */
static inline void workaround_nehalem_aaj79(unsigned *ebx)
{
	union {
		unsigned eax;
		struct {
			unsigned stepping : 4;
			unsigned model : 4;
			unsigned family : 4;
			unsigned type : 2;
			unsigned res : 2;
			unsigned ext_model : 4;
			unsigned ext_family : 8;
			unsigned res2 : 4;
		};
	} v;
	unsigned model;

	if (!cpuid_vendor("GenuineIntel"))
		return;
        arch_cpuid_1(1);
	model = (v.ext_model << 4) + v.model;
	if (v.family != 6 || model != 26 || v.stepping > 4)
		return;
	*ebx |= (1 << 2);	/* disable unsupported event */
}

static inline unsigned arch_get_filter(op_cpu cpu_type)
{
	if (cpu_type == CPU_ARCH_PERFMON) { 
		unsigned ebx, eax;
#ifdef __PIC__
                __asm__ __volatile__ (
                    "pushl %%ebx\n"
                    "cpuid\n"
                    "mov %%ebx, %%ecx\n"
                    "popl %%ebx"
                    : "=a" (eax), "=c" (ebx)
                    : "a" (0xa)
                    : "edx"
                );
#else
		asm("cpuid" : "=a" (eax), "=b" (ebx) : "0" (0xa) : "ecx","edx");
#endif
		workaround_nehalem_aaj79(&ebx);
		return ebx & num_to_mask(eax >> 24);
	}
	return -1U;
}

static inline int arch_num_counters(op_cpu cpu_type) 
{
	if (cpu_type == CPU_ARCH_PERFMON) {
		unsigned v = arch_cpuid_1(0xa);
		return (v >> 8) & 0xff;
	} 
	return -1;
}

static inline unsigned arch_get_counter_mask(void)
{
	unsigned v = arch_cpuid_1(0xa);
	return num_to_mask((v >> 8) & 0xff);
}

#else

static inline unsigned arch_get_filter(op_cpu cpu_type)
{
	/* Do something with passed arg to shut up the compiler warning */
	if (cpu_type != CPU_NO_GOOD)
		return 0;
	return 0;
}

static inline int arch_num_counters(op_cpu cpu_type) 
{
	/* Do something with passed arg to shut up the compiler warning */
	if (cpu_type != CPU_NO_GOOD)
		return -1;
	return -1;
}

static inline unsigned arch_get_counter_mask(void)
{
	return 0;
}

#endif