summaryrefslogtreecommitdiff
path: root/tests/memtest/fptest.cpp
blob: 564b700a08a651c67c1662099ca45f8e21cd412a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/*
 * Copyright (C) 2007 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include <sched.h>
#include <sys/resource.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/mman.h>

#ifdef __ARM_NEON__
#include <arm_neon.h>
#endif


typedef long long nsecs_t;
static nsecs_t gTime;
float data_f[1024 * 128];

static nsecs_t system_time()
{
    struct timespec t;
    t.tv_sec = t.tv_nsec = 0;
    clock_gettime(CLOCK_MONOTONIC, &t);
    return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
}

static void startTime()
{
    gTime = system_time();
}

static void endTime(const char *str, double ops)
{
    nsecs_t t = system_time() - gTime;
    double ds = ((double)t) / 1e9;
    printf("Test: %s, %f Mops\n", str, ops / ds / 1e6);
}


static void test_mad() {
    for(int i=0; i<1020; i++) {
        data_f[i] = i;
    }

    startTime();

    float total = 0;
    // Do ~1 billion ops
    for (int ct=0; ct < (1000 * (1000 / 20)); ct++) {
        for (int i=0; i < 1000; i++) {
            data_f[i] = (data_f[i] * 0.02f +
                         data_f[i+1] * 0.04f +
                         data_f[i+2] * 0.05f +
                         data_f[i+3] * 0.1f +
                         data_f[i+4] * 0.2f +
                         data_f[i+5] * 0.2f +
                         data_f[i+6] * 0.1f +
                         data_f[i+7] * 0.05f +
                         data_f[i+8] * 0.04f +
                         data_f[i+9] * 0.02f + 1.f);
        }
    }

    endTime("scalar mad", 1e9);
}


#ifdef __ARM_NEON__

static void test_fma() {
    for(int i=0; i<1020 * 4; i++) {
        data_f[i] = i;
    }
    float32x4_t c0_02 = vdupq_n_f32(0.02f);
    float32x4_t c0_04 = vdupq_n_f32(0.04f);
    float32x4_t c0_05 = vdupq_n_f32(0.05f);
    float32x4_t c0_10 = vdupq_n_f32(0.1f);
    float32x4_t c0_20 = vdupq_n_f32(0.2f);
    float32x4_t c1_00 = vdupq_n_f32(1.0f);

    startTime();

    float total = 0;
    // Do ~1 billion ops
    for (int ct=0; ct < (1000 * (1000 / 80)); ct++) {
        for (int i=0; i < 1000; i++) {
            float32x4_t t;
            t = vmulq_f32(vld1q_f32((float32_t *)&data_f[i]), c0_02);
            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+4]), c0_04);
            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+8]), c0_05);
            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+12]), c0_10);
            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+16]), c0_20);
            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+20]), c0_20);
            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+24]), c0_10);
            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+28]), c0_05);
            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+32]), c0_04);
            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+36]), c0_02);
            t = vaddq_f32(t, c1_00);
            vst1q_f32((float32_t *)&data_f[i], t);
        }
    }

    endTime("neon fma", 1e9);
}
#endif

int fp_test(int argc, char** argv) {
    test_mad();

#ifdef __ARM_NEON__
    test_fma();
#endif

    return 0;
}