aboutsummaryrefslogtreecommitdiff
path: root/src/os.h
blob: 0812f1aa0b4a9607c2e0ea01925ef2fbdaeafdb4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
// Copyright 2006 Google Inc. All Rights Reserved.
// Author: nsanders, menderico

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//      http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef STRESSAPPTEST_OS_H_  // NOLINT
#define STRESSAPPTEST_OS_H_

#include <dirent.h>
#include <unistd.h>
#include <sys/syscall.h>

#include <string>
#include <list>
#include <map>
#include <vector>

// This file must work with autoconf on its public version,
// so these includes are correct.
#include "adler32memcpy.h"  // NOLINT
#include "sattypes.h"       // NOLINT
#include "clock.h"          // NOLINT

const char kPagemapPath[] = "/proc/self/pagemap";

struct PCIDevice {
  int32 domain;
  uint16 bus;
  uint8 dev;
  uint8 func;
  uint16 vendor_id;
  uint16 device_id;
  uint64 base_addr[6];
  uint64 size[6];
};

typedef vector<PCIDevice*> PCIDevices;

class ErrorDiag;

class Clock;

// This class implements OS/Platform specific funtions.
class OsLayer {
 public:
  OsLayer();
  virtual ~OsLayer();

  // Set the minimum amount of hugepages that should be available for testing.
  // Must be set before Initialize().
  void SetMinimumHugepagesSize(int64 min_bytes) {
    min_hugepages_bytes_ = min_bytes;
  }

  // Set the minium amount of memory that should not be allocated. This only
  // has any affect if hugepages are not used.
  // Must be set before Initialize().
  void SetReserveSize(int64 reserve_mb) {
    reserve_mb_ = reserve_mb;
  }

  // Set parameters needed to translate physical address to memory module.
  void SetDramMappingParams(uintptr_t channel_hash, int channel_width,
                            vector< vector<string> > *channels) {
    channel_hash_ = channel_hash;
    channel_width_ = channel_width;
    channels_ = channels;
  }

  // Initializes data strctures and open files.
  // Returns false on error.
  virtual bool Initialize();

  // Virtual to physical. This implementation is optional for
  // subclasses to implement.
  // Takes a pointer, and returns the corresponding bus address.
  virtual uint64 VirtualToPhysical(void *vaddr);

  // Prints failed dimm. This implementation is optional for
  // subclasses to implement.
  // Takes a bus address and string, and prints the DIMM name
  // into the string. Returns the DIMM number that corresponds to the
  // address given, or -1 if unable to identify the DIMM number.
  // Note that subclass implementations of FindDimm() MUST fill
  // buf with at LEAST one non-whitespace character (provided len > 0).
  virtual int FindDimm(uint64 addr, char *buf, int len);

  // Classifies addresses according to "regions"
  // This may mean different things on different platforms.
  virtual int32 FindRegion(uint64 paddr);
  // Find cpu cores associated with a region. Either NUMA or arbitrary.
  virtual cpu_set_t *FindCoreMask(int32 region);
  // Return cpu cores associated with a region in a hex string.
  virtual string FindCoreMaskFormat(int32 region);

  // Returns the HD device that contains this file.
  virtual string FindFileDevice(string filename);

  // Returns a list of paths coresponding to HD devices found on this machine.
  virtual list<string> FindFileDevices();

  // Polls for errors. This implementation is optional.
  // This will poll once for errors and return zero iff no errors were found.
  virtual int ErrorPoll();

  // Delay an appropriate amount of time between polling.
  virtual void ErrorWait();

  // Report errors. This implementation is mandatory.
  // This will output a machine readable line regarding the error.
  virtual bool ErrorReport(const char *part, const char *symptom, int count);

  // Flushes page cache. Used to circumvent the page cache when doing disk
  // I/O.  This will be a NOP until ActivateFlushPageCache() is called, which
  // is typically done when opening a file with O_DIRECT fails.
  // Returns false on error, true on success or NOP.
  // Subclasses may implement this in machine specific ways..
  virtual bool FlushPageCache(void);
  // Enable FlushPageCache() to actually do the flush instead of being a NOP.
  virtual void ActivateFlushPageCache(void);

  // Flushes cacheline. Used to distinguish read or write errors.
  // Subclasses may implement this in machine specific ways..
  // Takes a pointer, and flushed the cacheline containing that pointer.
  virtual void Flush(void *vaddr);

  // Fast flush, for use in performance critical code.
  // This is bound at compile time, and will not pick up
  // any runtime machine configuration info.
  inline static void FastFlush(void *vaddr) {
#ifdef STRESSAPPTEST_CPU_PPC
    asm volatile("dcbf 0,%0; sync" : : "r" (vaddr));
#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    // Put mfence before and after clflush to make sure:
    // 1. The write before the clflush is committed to memory bus;
    // 2. The read after the clflush is hitting the memory bus.
    //
    // From Intel manual:
    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
    // to be ordered by any other fencing, serializing or other CLFLUSH
    // instruction. For example, software can use an MFENCE instruction to
    // insure that previous stores are included in the write-back.
    asm volatile("mfence");
    asm volatile("clflush (%0)" : : "r" (vaddr));
    asm volatile("mfence");
#elif defined(STRESSAPPTEST_CPU_ARMV7A) && !defined(__aarch64__)
    // ARMv7a cachelines are 8 words (32 bytes).
    syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
#else
  #warning "Unsupported CPU type: Unable to force cache flushes."
#endif
  }

  // Fast flush, for use in performance critical code.
  // This is bound at compile time, and will not pick up
  // any runtime machine configuration info.  Takes a NULL-terminated
  // array of addresses to flush.
  inline static void FastFlushList(void **vaddrs) {
#ifdef STRESSAPPTEST_CPU_PPC
    while (*vaddrs) {
      asm volatile("dcbf 0,%0" : : "r" (*vaddrs++));
    }
    asm volatile("sync");
#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    // Put mfence before and after clflush to make sure:
    // 1. The write before the clflush is committed to memory bus;
    // 2. The read after the clflush is hitting the memory bus.
    //
    // From Intel manual:
    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
    // to be ordered by any other fencing, serializing or other CLFLUSH
    // instruction. For example, software can use an MFENCE instruction to
    // insure that previous stores are included in the write-back.
    asm volatile("mfence");
    while (*vaddrs) {
      asm volatile("clflush (%0)" : : "r" (*vaddrs++));
    }
    asm volatile("mfence");
#elif defined(STRESSAPPTEST_CPU_ARMV7A)
    while (*vaddrs) {
      FastFlush(*vaddrs++);
    }
#else
    #warning "Unsupported CPU type: Unable to force cache flushes."
#endif
  }

  // Fast flush hint, for use in performance critical code.
  // This is bound at compile time, and will not pick up
  // any runtime machine configuration info.  Note that this
  // will not guarantee that a flush happens, but will at least
  // hint that it should.  This is useful for speeding up
  // parallel march algorithms.
  inline static void FastFlushHint(void *vaddr) {
#ifdef STRESSAPPTEST_CPU_PPC
    asm volatile("dcbf 0,%0" : : "r" (vaddr));
#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    // From Intel manual:
    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
    // to be ordered by any other fencing, serializing or other CLFLUSH
    // instruction. For example, software can use an MFENCE instruction to
    // insure that previous stores are included in the write-back.
    asm volatile("clflush (%0)" : : "r" (vaddr));
#elif defined(STRESSAPPTEST_CPU_ARMV7A)
    FastFlush(vaddr);
#else
    #warning "Unsupported CPU type: Unable to force cache flushes."
#endif
  }

  // Fast flush, for use in performance critical code.
  // This is bound at compile time, and will not pick up
  // any runtime machine configuration info.  Sync's any
  // transactions for ordering FastFlushHints.
  inline static void FastFlushSync() {
#ifdef STRESSAPPTEST_CPU_PPC
    asm volatile("sync");
#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    // Put mfence before and after clflush to make sure:
    // 1. The write before the clflush is committed to memory bus;
    // 2. The read after the clflush is hitting the memory bus.
    //
    // From Intel manual:
    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
    // to be ordered by any other fencing, serializing or other CLFLUSH
    // instruction. For example, software can use an MFENCE instruction to
    // insure that previous stores are included in the write-back.
    asm volatile("mfence");
#elif defined(STRESSAPPTEST_CPU_ARMV7A)
    // This is a NOP, FastFlushHint() always does a full flush, so there's
    // nothing to do for FastFlushSync().
#else
  #warning "Unsupported CPU type: Unable to force cache flushes."
#endif
  }

  // Get time in cpu timer ticks. Useful for matching MCEs with software
  // actions.
  inline static uint64 GetTimestamp(void) {
    uint64 tsc;
#ifdef STRESSAPPTEST_CPU_PPC
    uint32 tbl, tbu, temp;
    __asm __volatile(
      "1:\n"
      "mftbu  %2\n"
      "mftb   %0\n"
      "mftbu  %1\n"
      "cmpw   %2,%1\n"
      "bne    1b\n"
      : "=r"(tbl), "=r"(tbu), "=r"(temp)
      :
      : "cc");

    tsc = (static_cast<uint64>(tbu) << 32) | static_cast<uint64>(tbl);
#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    datacast_t data;
    __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h));
    tsc = data.l64;
#elif defined(STRESSAPPTEST_CPU_ARMV7A)
    #warning "Unsupported CPU type ARMV7A: your timer may not function correctly"
    tsc = 0;
#else
    #warning "Unsupported CPU type: your timer may not function correctly"
    tsc = 0;
#endif
    return (tsc);
  }

  // Find the free memory on the machine.
  virtual int64 FindFreeMemSize();

  // Allocates test memory of length bytes.
  // Subclasses must implement this.
  // Call PepareTestMem to get a pointer.
  virtual int64 AllocateAllMem();  // Returns length.
  // Returns success.
  virtual bool AllocateTestMem(int64 length, uint64 paddr_base);
  virtual void FreeTestMem();

  // Prepares the memory for use. You must call this
  // before using test memory, and after you are done.
  virtual void *PrepareTestMem(uint64 offset, uint64 length);
  virtual void ReleaseTestMem(void *addr, uint64 offset, uint64 length);

  // Machine type detected. Can we implement all these functions correctly?
  // Returns true if machine type is detected and implemented.
  virtual bool IsSupported();

  // Returns 32 for 32-bit, 64 for 64-bit.
  virtual int AddressMode();
  // Update OsLayer state regarding cpu support for various features.
  virtual void GetFeatures();

  // Open, read, write pci cfg through /proc/bus/pci. fd is /proc/pci file.
  virtual int PciOpen(int bus, int device, int function);
  virtual void PciWrite(int fd, uint32 offset, uint32 value, int width);
  virtual uint32 PciRead(int fd, uint32 offset, int width);

  // Read MSRs
  virtual bool ReadMSR(uint32 core, uint32 address, uint64 *data);
  virtual bool WriteMSR(uint32 core, uint32 address, uint64 *data);

  // Extract bits [n+len-1, n] from a 32 bit word.
  // so GetBitField(0x0f00, 8, 4) == 0xf.
  virtual uint32 GetBitField(uint32 val, uint32 n, uint32 len);

  // Platform and CPU specific CPU-stressing function.
  // Returns true on success, false otherwise.
  virtual bool CpuStressWorkload();

  // Causes false errors for unittesting.
  // Setting to "true" causes errors to be injected.
  void set_error_injection(bool errors) { error_injection_ = errors; }
  bool error_injection() const { return error_injection_; }

  // Is SAT using normal malloc'd memory, or exotic mmap'd memory.
  bool normal_mem() const { return normal_mem_; }

  // Get numa config, if available..
  int num_nodes() const { return num_nodes_; }
  int num_cpus() const { return num_cpus_; }

  // Handle to platform-specific error diagnoser.
  ErrorDiag *error_diagnoser_;

  // Disambiguate between different "warm" memcopies.
  virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
                               unsigned int size_in_bytes,
                               AdlerChecksum *checksum);

  // Store a callback to use to print
  // app-specific info about the last error location.
  // This call back is called with a physical address, and the app can fill in
  // the most recent transaction that occurred at that address.
  typedef bool (*ErrCallback)(uint64 paddr, string *buf);
  void set_err_log_callback(
    ErrCallback err_log_callback) {
    err_log_callback_ = err_log_callback;
  }
  ErrCallback get_err_log_callback() { return err_log_callback_; }

  // Set a clock object that can be overridden for use with unit tests.
  void SetClock(Clock *clock) {
    if (clock_) {
      delete clock_;
    }
    clock_ = clock;
    time_initialized_ = clock_->Now();
  }

 protected:
  void *testmem_;                // Location of test memory.
  uint64 testmemsize_;           // Size of test memory.
  int64 totalmemsize_;           // Size of available memory.
  int64 min_hugepages_bytes_;    // Minimum hugepages size.
  int64 reserve_mb_;             // Minimum amount of memory to reserve in MB.
  bool  error_injection_;        // Do error injection?
  bool  normal_mem_;             // Memory DMA capable?
  bool  use_hugepages_;          // Use hugepage shmem?
  bool  use_posix_shm_;          // Use 4k page shmem?
  bool  dynamic_mapped_shmem_;   // Conserve virtual address space.
  bool  mmapped_allocation_;     // Was memory allocated using mmap()?
  int   shmid_;                  // Handle to shmem
  vector< vector<string> > *channels_;  // Memory module names per channel.
  uint64 channel_hash_;          // Mask of address bits XORed for channel.
  int channel_width_;            // Channel width in bits.

  int64 regionsize_;             // Size of memory "regions"
  int   regioncount_;            // Number of memory "regions"
  int   num_cpus_;               // Number of cpus in the system.
  int   num_nodes_;              // Number of nodes in the system.
  int   num_cpus_per_node_;      // Number of cpus per node in the system.
  int   address_mode_;           // Are we running 32 or 64 bit?
  bool  has_vector_;             // Do we have sse2/neon instructions?
  bool  has_clflush_;            // Do we have clflush instructions?
  bool  use_flush_page_cache_;   // Do we need to flush the page cache?


  time_t time_initialized_;      // Start time of test.

  vector<cpu_set_t> cpu_sets_;   // Cache for cpu masks.
  vector<bool> cpu_sets_valid_;  // If the cpu mask cache is valid.

  // Get file descriptor for dev msr.
  virtual int OpenMSR(uint32 core, uint32 address);

  // Look up how many hugepages there are.
  virtual int64 FindHugePages();

  // Link to find last transaction at an error location.
  ErrCallback err_log_callback_;

  // Object to wrap the time function.
  Clock *clock_;

 private:
  DISALLOW_COPY_AND_ASSIGN(OsLayer);
};

// Selects and returns the proper OS and hardware interface.  Does not call
// OsLayer::Initialize() on the new object.
OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options);

#endif  // STRESSAPPTEST_OS_H_ NOLINT