aboutsummaryrefslogtreecommitdiff
path: root/netcpu_looper.c
blob: b76c7c96a03d02cce6511e29d96f7d5c3ab03ef3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
char   netcpu_looper_id[]="\
@(#)netcpu_looper.c (c) Copyright 2005-2007. Version 2.4.3";

/* netcpu_looper.c
  
   Implement the soaker process specific portions of netperf CPU
   utilization measurements. These are broken-out into a separate file
   to make life much nicer over in netlib.c which had become a maze of
   twisty, CPU-util-related, #ifdefs, all different.  raj 2005-01-26
   */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>

#ifdef HAVE_FCNTL_H
# include <fcntl.h>
#endif
#if HAVE_UNISTD_H
# include <unistd.h>
#endif
#if defined(HAVE_MMAP) || defined(HAVE_SYS_MMAN_H)
# include <sys/mman.h>
#else
# error netcpu_looper requires mmap
#endif

#if TIME_WITH_SYS_TIME
# include <sys/time.h>
# include <time.h>
#else
# if HAVE_SYS_TIME_H
#  include <sys/time.h>
# else
#  include <time.h>
# endif
#endif

#if HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif

#if HAVE_SYS_WAIT_H
# include <sys/wait.h>
#endif

#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif

#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif

#include "netsh.h"
#include "netlib.h"

#define PAGES_PER_CHILD 2

/* the lib_start_count and lib_end_count arrays hold the starting
   and ending values of whatever is counting when the system is
   idle. The rate at which this increments during a test is compared
   with a previous calibrarion to arrive at a CPU utilization
   percentage. raj 2005-01-26 */
static uint64_t  lib_start_count[MAXCPUS];
static uint64_t  lib_end_count[MAXCPUS];

static int *cpu_mappings;

static int lib_idle_fd;
static uint64_t *lib_idle_address[MAXCPUS];
static long     *lib_base_pointer;
static pid_t     lib_idle_pids[MAXCPUS];
static int       lib_loopers_running=0;

/* we used to use this code to bind the loopers, but since we have
   decided to enable processor affinity for the actual
   netperf/netserver processes we will use that affinity routine,
   which happens to know about more systems than this */

#ifdef NOTDEF
static void
bind_to_processor(int child_num)
{
  /* This routine will bind the calling process to a particular */
  /* processor. We are not choosy as to which processor, so it will be */
  /* the process id mod the number of processors - shifted by one for */
  /* those systems which name processor starting from one instead of */
  /* zero. on those systems where I do not yet know how to bind a */
  /* process to a processor, this routine will be a no-op raj 10/95 */

  /* just as a reminder, this is *only* for the looper processes, not */
  /* the actual measurement processes. those will, should, MUST float */
  /* or not float from CPU to CPU as controlled by the operating */
  /* system defaults. raj 12/95 */

#ifdef __hpux
#include <sys/syscall.h>
#include <sys/mp.h>

  int old_cpu = -2;

  if (debug) {
    fprintf(where,
            "child %d asking for CPU %d as pid %d with %d CPUs\n",
            child_num,
            (child_num % lib_num_loc_cpus),
            getpid(),
            lib_num_loc_cpus);
    fflush(where);
  }

  SETPROCESS((child_num % lib_num_loc_cpus), getpid());
  return;

#else
#if defined(__sun) && defined(__SVR4)
 /* should only be Solaris */
#include <sys/processor.h>
#include <sys/procset.h>

  int old_binding;

  if (debug) {
    fprintf(where,
            "bind_to_processor: child %d asking for CPU %d as pid %d with %d CPUs\n",
            child_num,
            (child_num % lib_num_loc_cpus),
            getpid(),
            lib_num_loc_cpus);
    fflush(where);
  }

  if (processor_bind(P_PID,
                     getpid(),
                     (child_num % lib_num_loc_cpus), 
                      &old_binding) != 0) {
    fprintf(where,"bind_to_processor: unable to perform processor binding\n");
    fprintf(where,"                   errno %d\n",errno);
    fflush(where);
  }
  return;
#else
#ifdef WIN32

  if (!SetThreadAffinityMask(GetCurrentThread(), (ULONG_PTR)1 << (child_num % lib_num_loc_cpus))) {
    perror("SetThreadAffinityMask failed");
    fflush(stderr);
  }

  if (debug) {
    fprintf(where,
            "bind_to_processor: child %d asking for CPU %d of %d CPUs\n",
            child_num,
            (child_num % lib_num_loc_cpus),
            lib_num_loc_cpus);
    fflush(where);
  }

#endif
  return;
#endif /* __sun && _SVR4 */
#endif /* __hpux */
}
#endif

 /* sit_and_spin will just spin about incrementing a value */
 /* this value will either be in a memory mapped region on Unix shared */
 /* by each looper process, or something appropriate on Windows/NT */
 /* (malloc'd or such). This routine is reasonably ugly in that it has */
 /* priority manipulating code for lots of different operating */
 /* systems. This routine never returns. raj 1/96 */ 

static void
sit_and_spin(int child_index)

{
  uint64_t *my_counter_ptr;

 /* only use C stuff if we are not WIN32 unless and until we */
 /* switch from CreateThread to _beginthread. raj 1/96 */
#ifndef WIN32
  /* we are the child. we could decide to exec some separate */
  /* program, but that doesn't really seem worthwhile - raj 4/95 */
  if (debug > 1) {
    fprintf(where,
            "Looper child %d is born, pid %d\n",
            child_index,
            getpid());
    fflush(where);
  }
  
#endif /* WIN32 */

  /* reset our base pointer to be at the appropriate offset */
  my_counter_ptr = (uint64_t *) ((char *)lib_base_pointer + 
                             (netlib_get_page_size() * 
                              PAGES_PER_CHILD * child_index));
  
  /* in the event we are running on an MP system, it would */
  /* probably be good to bind the soaker processes to specific */
  /* processors. I *think* this is the most reasonable thing to */
  /* do, and would be closes to simulating the information we get */
  /* on HP-UX with pstat. I could put all the system-specific code */
  /* here, but will "abstract it into another routine to keep this */
  /* area more readable. I'll probably do the same thine with the */
  /* "low pri code" raj 10/95 */
  
  /* since we are "flying blind" wrt where we should bind the looper
     processes, we want to use the cpu_map that was prepared by netlib
     rather than assume that the CPU ids on the system start at zero
     and are contiguous. raj 2006-04-03 */
  bind_to_specific_processor(child_index % lib_num_loc_cpus,1);
  
  for (*my_counter_ptr = 0L;
       ;
       (*my_counter_ptr)++) {
    if (!(*lib_base_pointer % 1)) {
      /* every once and again, make sure that our process priority is */
      /* nice and low. also, by making system calls, it may be easier */
      /* for us to be pre-empted by something that needs to do useful */
      /* work - like the thread of execution actually sending and */
      /* receiving data across the network :) */
#ifdef _AIX
      int pid,prio;

      prio = PRIORITY;
      pid = getpid();
      /* if you are not root, this call will return EPERM - why one */
      /* cannot change one's own priority to  lower value is beyond */
      /* me. raj 2/26/96 */  
      setpri(pid, prio);
#else /* _AIX */
#ifdef __sgi
      int pid,prio;

      prio = PRIORITY;
      pid = getpid();
      schedctl(NDPRI, pid, prio);
      sginap(0);
#else /* __sgi */
#ifdef WIN32
      SetThreadPriority(GetCurrentThread(),THREAD_PRIORITY_IDLE);
#else /* WIN32 */
#if defined(__sun) && defined(__SVR4)
#include <sys/types.h>
#include <sys/priocntl.h>
#include <sys/rtpriocntl.h>
#include <sys/tspriocntl.h>
      /* I would *really* like to know how to use priocntl to make the */
      /* priority low for this looper process. however, either my mind */
      /* is addled, or the manpage in section two for priocntl is not */
      /* terribly helpful - for one, it has no examples :( so, if you */
      /* can help, I'd love to hear from you. in the meantime, we will */
      /* rely on nice(39). raj 2/26/96 */
      nice(39);
#else /* __sun && __SVR4 */
      nice(39);
#endif /* __sun && _SVR4 */
#endif /* WIN32 */
#endif /* __sgi */
#endif /* _AIX */
    }
  }
}



 /* this routine will start all the looper processes or threads for */
 /* measuring CPU utilization. */

static void
start_looper_processes()
{

  unsigned int      i, file_size;
  
  /* we want at least two pages for each processor. the */
  /* child for any one processor will write to the first of his two */
  /* pages, and the second page will be a buffer in case there is page */
  /* prefetching. if your system pre-fetches more than a single page, */
  /* well, you'll have to modify this or live with it :( raj 4/95 */

  file_size = ((netlib_get_page_size() * PAGES_PER_CHILD) * 
               lib_num_loc_cpus);
  
#ifndef WIN32

  /* we we are not using WINDOWS NT (or 95 actually :), then we want */
  /* to create a memory mapped region so we can see all the counting */
  /* rates of the loopers */

  /* could we just use an anonymous memory region for this? it is */
  /* possible that using a mmap()'ed "real" file, while convenient for */
  /* debugging, could result in some filesystem activity - like */
  /* metadata updates? raj 4/96 */
  lib_idle_fd = open("/tmp/netperf_cpu",O_RDWR | O_CREAT | O_EXCL);
  
  if (lib_idle_fd == -1) {
    fprintf(where,"create_looper: file creation; errno %d\n",errno);
    fflush(where);
    exit(1);
  }
  
  if (chmod("/tmp/netperf_cpu",0644) == -1) {
    fprintf(where,"create_looper: chmod; errno %d\n",errno);
    fflush(where);
    exit(1);
  }
  
  /* with the file descriptor in place, lets be sure that the file is */
  /* large enough. */
  
  if (truncate("/tmp/netperf_cpu",file_size) == -1) {
    fprintf(where,"create_looper: truncate: errno %d\n",errno);
    fflush(where);
    exit(1);
  }
  
  /* the file should be large enough now, so we can mmap it */
  
  /* if the system does not have MAP_VARIABLE, just define it to */
  /* be zero. it is only used/needed on HP-UX (?) raj 4/95 */
#ifndef MAP_VARIABLE
#define MAP_VARIABLE 0x0000
#endif /* MAP_VARIABLE */
#ifndef MAP_FILE
#define MAP_FILE 0x0000
#endif /* MAP_FILE */
  if ((lib_base_pointer = (long *)mmap(NULL,
                                       file_size,
                                       PROT_READ | PROT_WRITE,
                                       MAP_FILE | MAP_SHARED | MAP_VARIABLE,
                                       lib_idle_fd,
                                       0)) == (long *)-1) {
    fprintf(where,"create_looper: mmap: errno %d\n",errno);
    fflush(where);
    exit(1);
  }
  

  if (debug > 1) {
    fprintf(where,"num CPUs %d, file_size %d, lib_base_pointer %p\n",
            lib_num_loc_cpus,
            file_size,
            lib_base_pointer);
    fflush(where);
  }

  /* we should have a valid base pointer. lets fork */
  
  for (i = 0; i < (unsigned int)lib_num_loc_cpus; i++) {
    switch (lib_idle_pids[i] = fork()) {
    case -1:
      perror("netperf: fork");
      exit(1);
    case 0:
      /* we are the child. we could decide to exec some separate */
      /* program, but that doesn't really seem worthwhile - raj 4/95 */

      signal(SIGTERM, SIG_DFL);
      sit_and_spin(i);

      /* we should never really get here, but if we do, just exit(0) */
      exit(0);
      break;
    default:
      /* we must be the parent */
      lib_idle_address[i] = (uint64_t *) ((char *)lib_base_pointer + 
                                      (netlib_get_page_size() * 
                                       PAGES_PER_CHILD * i));
      if (debug) {
        fprintf(where,"lib_idle_address[%d] is %p\n",
                i,
                lib_idle_address[i]);
        fflush(where);
      }
    }
  }
#else
  /* we are compiled -DWIN32 */
  if ((lib_base_pointer = malloc(file_size)) == NULL) {
    fprintf(where,
            "create_looper_process could not malloc %d bytes\n",
            file_size);
    fflush(where);
    exit(1);
  }

  /* now, create all the threads */
  for(i = 0; i < (unsigned int)lib_num_loc_cpus; i++) {
    long place_holder;
    if ((lib_idle_pids[i] = CreateThread(0,
                                         0,
                                         (LPTHREAD_START_ROUTINE)sit_and_spin,
                                         (LPVOID)(ULONG_PTR)i,
                                         0,
                                         &place_holder)) == NULL ) {
      fprintf(where,
              "create_looper_process: CreateThread failed\n");
      fflush(where);
      /* I wonder if I need to look for other threads to kill? */
      exit(1);
    }
    lib_idle_address[i] = (long *) ((char *)lib_base_pointer + 
                                    (netlib_get_page_size() * 
                                     PAGES_PER_CHILD * i));
    if (debug) {
      fprintf(where,"lib_idle_address[%d] is %p\n",
              i,
              lib_idle_address[i]);
      fflush(where);
    }
  }
#endif /* WIN32 */

  /* we need to have the looper processes settled-in before we do */
  /* anything with them, so lets sleep for say 30 seconds. raj 4/95 */

  sleep(30);
}

void
cpu_util_init(void) 
{
  cpu_method = LOOPER;

  /* we want to get the looper processes going */
  if (!lib_loopers_running) {
    start_looper_processes();
    lib_loopers_running = 1;
  }

  return;
}

/* clean-up any left-over CPU util resources - looper processes,
   files, whatever.  raj 2005-01-26 */
void
cpu_util_terminate() {

#ifdef WIN32
  /* it would seem that if/when the process exits, all the threads */
  /* will go away too, so I don't think I need any explicit thread */
  /* killing calls here. raj 1/96 */
#else

  int i;

  /* now go through and kill-off all the child processes */
  for (i = 0; i < lib_num_loc_cpus; i++){
    /* SIGKILL can leave core files behind - thanks to Steinar Haug */
    /* for pointing that out. */
    kill(lib_idle_pids[i],SIGTERM);
  }
  lib_loopers_running = 0;
  /* reap the children */
  while(waitpid(-1, NULL, WNOHANG) > 0) { }
  
  /* finally, unlink the mmaped file */
  munmap((caddr_t)lib_base_pointer,
         ((netlib_get_page_size() * PAGES_PER_CHILD) * 
          lib_num_loc_cpus));
  unlink("/tmp/netperf_cpu");
#endif
  return;
}

int
get_cpu_method(void)
{
  return LOOPER;
}

 /* calibrate_looper */

 /* Loop a number of iterations, sleeping interval seconds each and */
 /* count how high the idle counter gets each time. Return  the */
 /* measured cpu rate to the calling routine. raj 4/95 */

float
calibrate_idle_rate (int iterations, int interval)
{

  uint64_t
    firstcnt[MAXCPUS],
    secondcnt[MAXCPUS];

  float 
    elapsed,
    temp_rate,
    rate[MAXTIMES],
    local_maxrate;

  long  
    sec,
    usec;

  int   
    i,
    j;
  
  struct  timeval time1, time2 ;
  struct  timezone tz;
  
  if (iterations > MAXTIMES) {
    iterations = MAXTIMES;
  }

  local_maxrate = (float)-1.0;
  
  for(i = 0; i < iterations; i++) {
    rate[i] = (float)0.0;
    for (j = 0; j < lib_num_loc_cpus; j++) {
      firstcnt[j] = *(lib_idle_address[j]);
    }
    gettimeofday (&time1, &tz);
    sleep(interval);
    gettimeofday (&time2, &tz);

    if (time2.tv_usec < time1.tv_usec)
      {
        time2.tv_usec += 1000000;
        time2.tv_sec -=1;
      }
    sec = time2.tv_sec - time1.tv_sec;
    usec = time2.tv_usec - time1.tv_usec;
    elapsed = (float)sec + ((float)usec/(float)1000000.0);
    
    if(debug) {
      fprintf(where, "Calibration for counter run: %d\n",i);
      fprintf(where,"\tsec = %ld usec = %ld\n",sec,usec);
      fprintf(where,"\telapsed time = %g\n",elapsed);
    }

    for (j = 0; j < lib_num_loc_cpus; j++) {
      secondcnt[j] = *(lib_idle_address[j]);
      if(debug) {
        /* I know that there are situations where compilers know about */
        /* long long, but the library fucntions do not... raj 4/95 */
        fprintf(where,
                "\tfirstcnt[%d] = 0x%8.8lx%8.8lx secondcnt[%d] = 0x%8.8lx%8.8lx\n",
                j,
                (uint32_t)(firstcnt[j]>>32),
                (uint32_t)(firstcnt[j]&0xffffffff),
                j,
                (uint32_t)(secondcnt[j]>>32),
                (uint32_t)(secondcnt[j]&0xffffffff));
      }
      /* we assume that it would wrap no more than once. we also */
      /* assume that the result of subtracting will "fit" raj 4/95 */
      temp_rate = (secondcnt[j] >= firstcnt[j]) ?
        (float)(secondcnt[j] - firstcnt[j])/elapsed : 
          (float)(secondcnt[j]-firstcnt[j]+MAXLONG)/elapsed;
      if (temp_rate > rate[i]) rate[i] = temp_rate;
      if(debug) {
        fprintf(where,"\trate[%d] = %g\n",i,rate[i]);
        fflush(where);
      }
      if (local_maxrate < rate[i]) local_maxrate = rate[i];
    }
  }
  if(debug) {
    fprintf(where,"\tlocal maxrate = %g per sec. \n",local_maxrate);
    fflush(where);
  }
  return local_maxrate;
}


void
get_cpu_idle (uint64_t *res)
{
  int i;

  for (i = 0; i < lib_num_loc_cpus; i++){
    res[i] = *lib_idle_address[i];
  }

}

float
calc_cpu_util_internal(float elapsed_time)
{
  int i;
  float correction_factor;
  float actual_rate;

  lib_local_cpu_util = (float)0.0;
  /* It is possible that the library measured a time other than */
  /* the one that the user want for the cpu utilization */
  /* calculations - for example, tests that were ended by */
  /* watchdog timers such as the udp stream test. We let these */
  /* tests tell up what the elapsed time should be. */
  
  if (elapsed_time != 0.0) {
    correction_factor = (float) 1.0 + 
      ((lib_elapsed - elapsed_time) / elapsed_time);
  }
  else {
    correction_factor = (float) 1.0;
  }

  for (i = 0; i < lib_num_loc_cpus; i++) {

    /* it would appear that on some systems, in loopback, nice is
     *very* effective, causing the looper process to stop dead in its
     tracks. if this happens, we need to ensure that the calculation
     does not go south. raj 6/95 and if we run completely out of idle,
     the same thing could in theory happen to the USE_KSTAT path. raj
     8/2000 */ 
    
    if (lib_end_count[i] == lib_start_count[i]) {
      lib_end_count[i]++;
    }
    
    actual_rate = (lib_end_count[i] > lib_start_count[i]) ?
      (float)(lib_end_count[i] - lib_start_count[i])/lib_elapsed :
      (float)(lib_end_count[i] - lib_start_count[i] +
	      MAXLONG)/ lib_elapsed;
    if (debug) {
      fprintf(where,
              "calc_cpu_util: actual_rate on processor %d is %f start 0x%8.8lx%8.8lx end 0x%8.8lx%8.8lx\n",
              i,
              actual_rate,
              (uint32_t)(lib_start_count[i]>>32),
              (uint32_t)(lib_start_count[i]&0xffffffff),
              (uint32_t)(lib_end_count[i]>>32),
              (uint32_t)(lib_end_count[i]&0xffffffff));
    }
    lib_local_per_cpu_util[i] = (lib_local_maxrate - actual_rate) /
      lib_local_maxrate * 100;
    lib_local_cpu_util += lib_local_per_cpu_util[i];
  }
  /* we want the average across all n processors */
  lib_local_cpu_util /= (float)lib_num_loc_cpus;
  
  lib_local_cpu_util *= correction_factor;
  return lib_local_cpu_util;


}
void
cpu_start_internal(void)
{
  get_cpu_idle(lib_start_count);
  return;
}

void
cpu_stop_internal(void)
{
  get_cpu_idle(lib_end_count);
}