nn/runtime/ExecutionPlan.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356

/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Classes used to plan how to execute a model across multiple devices.

#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_EXECUTION_PLAN_H
#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_EXECUTION_PLAN_H

#include <android-base/logging.h>
#include <openssl/sha.h>

#include <map>
#include <memory>
#include <ostream>
#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "HalInterfaces.h"
#include "Memory.h"
#include "ModelBuilder.h"
#include "NeuralNetworks.h"
#include "TokenHasher.h"
#include "Utils.h"

namespace android {
namespace nn {

class BurstBuilder;
class CompilationBuilder;
class Device;
class ExecutionBuilder;
class ExecutionPlan;
class ExecutionBurstController;
class Memory;
class PreparedModel;
class StepExecutor;

class ExecutionStep {
   public:
    typedef std::vector<std::pair<uint32_t, uint32_t>> RemapVectorType;
    typedef std::set<std::pair<uint32_t, uint32_t>> SubModelOutputSetType;

    enum OperandKind { INPUT, OUTPUT };

    ExecutionStep(ExecutionPlan* plan, uint32_t stepIndex, std::shared_ptr<Device> device);
    int addOperation(int operationIndex, const ModelBuilder& fromModel);
    int addOperand(uint32_t fromOperandIndex, uint32_t* toOperandIndex,
                   const ModelBuilder& fromModel, OperandKind kind);

    // Each container entry is of the form (fromModel index, subModel index)
    const RemapVectorType& getModelInputs() const { return mModelInputs; }
    const RemapVectorType& getModelOutputs() const { return mModelOutputs; }
    const RemapVectorType& getTempsAsSubModelInputs() const { return mTempsAsSubModelInputs; }
    const SubModelOutputSetType& getTempsAsSubModelOutputs() const {
        return mTempsAsSubModelOutputs;
    }
    const RemapVectorType& getOutputsAsSubModelInputs() const { return mOutputsAsSubModelInputs; }
    const std::vector<uint32_t>& getOutputIndexSubModelToFromModel() const {
        return mOutputIndexSubModelToFromModel;
    }
    const std::vector<uint32_t>& getOutputsAsSubModelInputsIndexToFromModel() const {
        return mOutputsAsSubModelInputsIndexToFromModel;
    }

    void recordTempAsSubModelOutput(uint32_t fromModelIndex) {
        const auto it = mOperandMap.find(fromModelIndex);
        nnAssert(it != mOperandMap.end());
        mTempsAsSubModelOutputs.insert(std::make_pair(fromModelIndex, it->second));
    }

    // If this step has a submodel output of unknown size, sets
    // *hasOutputOfUnknownSize to true; otherwise, leaves it
    // unchanged.
    int finishSubModel(const ModelBuilder* fromModel, bool* hasOutputOfUnknownSize,
                       int32_t executionPreference);

    const ModelBuilder* getSubModel() const { return &mSubModel; }
    std::shared_ptr<Device> getDevice() const { return mDevice; }

    // only available after calling finishSubModel()
    std::shared_ptr<PreparedModel> getPreparedSubModel() const { return mPreparedSubModel; }

    // Map inputs and outputs from ExecutionBuilder to StepExecutor.
    void mapInputsAndOutputs(std::shared_ptr<StepExecutor> stepExecutor) const;

    void dump() const;

    // For test only, get the transformed cache token.
    const uint8_t* forTest_getCacheToken() const { return mToken.getCacheToken(); }

   private:
    void logSubModel() const;

    // TODO: Some of the data is working state information that
    // shouldn't be needed after we've constructed but not executed
    // the step.

    ExecutionPlan* mPlan;
    uint32_t mIndex;  // index of step within plan
    ModelBuilder mSubModel;
    std::shared_ptr<Device> mDevice;
    std::shared_ptr<PreparedModel> mPreparedSubModel;

    // Inputs of original model that are also inputs of this submodel:
    //     (fromModel index, subModel index)
    RemapVectorType mModelInputs;
    // Outputs of original model that are also outputs of this submodel:
    //     (fromModel index, subModel index)
    RemapVectorType mModelOutputs;
    // Temporaries of original model that are inputs of this submodel:
    //     (fromModel index, subModel index)
    RemapVectorType mTempsAsSubModelInputs;
    // Temporaries of original model that are outputs of this submodel:
    //     (fromModel index, subModel index)
    SubModelOutputSetType mTempsAsSubModelOutputs;
    // Outputs of original model that are inputs of this submodel:
    //     (fromModel index, subModel index)
    RemapVectorType mOutputsAsSubModelInputs;
    // Converts operand indexes from the main model to the submodel.
    std::unordered_map<uint32_t, uint32_t> mOperandMap;
    // Converts input indexes from the submodel to the main model
    // (these are input indexes, not operand indexes).  This vector
    // only describes inputs of the submodel that are also inputs of
    // the main model -- that is, mModelInputs but not mTempsAsSubModelInputs.
    std::vector<uint32_t> mInputIndexSubModelToFromModel;
    // Converts output indexes from the submodel to the main model
    // (these are output indexes, not operand indexes).  This vector
    // only describes outputs of the submodel that are also outputs of
    // the main model -- that is, mModelOutputs but not mTempsAsSubModelOutputs.
    std::vector<uint32_t> mOutputIndexSubModelToFromModel;
    // Converts indexes into mOutputsAsSubModelInputs to indexes into
    // main model outputs (these are input and output indexes, not
    // operand indexes).  To be specific, if the main model outputs
    // are mainModelOutputs,
    //
    //     mOutputsAsSubModelInputsIndexToFromModel.size() ==
    //     mOutputsAsSubModelInputs.size()
    //
    // and when (0 <= i < mOutputsAsSubModelInputs.size()),
    //
    //     mainModelOutputs[mOutputsAsSubModelInputsIndexToFromModel[i]] ==
    //     mOutputsAsSubModelInputs[i].first
    std::vector<uint32_t> mOutputsAsSubModelInputsIndexToFromModel;

    // The compilation caching token.
    TokenHasher mToken;
};

class ExecutionPlan {
   public:
    ExecutionPlan(const ExecutionPlan&) = delete;
    ExecutionPlan& operator=(const ExecutionPlan&) = delete;

    ExecutionPlan() {}
    ~ExecutionPlan() { delete mBody; }

    // Controller is part of the interface to a mechanism for
    // performing an execution in N steps.
    //
    // Usage pattern:
    // - Instantiate Controller with ExecutionPlan::makeController().
    // - Call ExecutionPlan::next() on Controller N+1 times.  The first N times,
    //   *executor is set to point to a new StepExecutor corresponding
    //   to that step.  The N+1st time, *executor is set to nullptr,
    //   signifying there are no more steps.
    // - If ExecutionPlan::next() returns anything other than ANEURALNETWORKS_NO_ERROR,
    //   a problem has occurred.
    class Controller {
        friend class ExecutionPlan;

       private:
        Controller(const Controller&) = delete;
        Controller& operator=(const Controller&) = delete;

        // Map from the operand index of a TEMPORARY in the original
        // model to an offset into mTemporaries used to represent that
        // TEMPORARY as an inter-partition input or output.
        typedef std::map<uint32_t, uint32_t> SubModelInputsAndOutputsType;

        static const size_t kBadStepIndex = ~size_t(0);

        Controller(const ExecutionPlan* plan, ExecutionBuilder* executionBuilder,
                   const BurstBuilder* burstBuilder,
                   std::shared_ptr<const SubModelInputsAndOutputsType> subModelInputsAndOutputs,
                   uint32_t totalSizeOfTemporaries);

        const ExecutionPlan* mPlan;
        ExecutionBuilder* mExecutionBuilder;
        const BurstBuilder* mBurstBuilder;
        std::shared_ptr<const SubModelInputsAndOutputsType>
                mSubModelInputsAndOutputs;  // may be nullptr
        std::unique_ptr<MemoryAshmem> mTemporaries;
        size_t mNextStepIndex;
    };

    std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts() const;

    std::shared_ptr<Controller> makeController(ExecutionBuilder* executionBuilder,
                                               const BurstBuilder* burstBuilder) const;

    int next(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor,
             std::shared_ptr<ExecutionBurstController>* burstController = nullptr) const;

    // Create the same executor as the last one created by next().
    int fallback(std::shared_ptr<Controller> controller,
                 std::shared_ptr<StepExecutor>* executor) const;

    std::shared_ptr<ExecutionStep> createNewStep(const std::shared_ptr<Device> device);

    void becomeSingleStep(const std::shared_ptr<Device> device, const ModelBuilder* model);

    int finish(const ModelBuilder* fromModel, int32_t executionPreference);

    void recordTemporaryDef(uint32_t fromModelIndex, uint32_t stepIndex) {
        auto& temporaryToDefiningStep = compound()->mTemporaryToDefiningStep;
        nnAssert(temporaryToDefiningStep.count(fromModelIndex) == 0);
        temporaryToDefiningStep.insert(std::make_pair(fromModelIndex, stepIndex));
    }

    void dump() const;

    void reset();

    bool isValid() const { return mState != EMPTY && mBody != nullptr && mBody->mSuccessfulFinish; }
    bool isSimple() const { return mState == SIMPLE; }
    bool isSimpleCpu() const;

    void setCaching(const std::string* cacheDir, const uint8_t* token) {
        mCacheDir = cacheDir;
        mToken = token;
    }
    const std::string* getCacheDir() const { return mCacheDir; }
    const uint8_t* getCacheToken() const { return mToken; }

    // These functions are solely intended for use by unit tests of
    // the partitioning algorithm.
    enum class Kind {
        ERROR,
        EMPTY,
        SIMPLE,
        COMPOUND
    };  // See operator<< defined outside this class
    Kind forTest_getKind() const;
    std::shared_ptr<const Device> forTest_simpleGetDevice() const;
    const std::vector<std::shared_ptr<ExecutionStep>>& forTest_compoundGetSteps() const;
    bool forTest_hasSubModelOutputsOfUnknownSize() const;
    const uint8_t* forTest_simpleGetCacheToken() const;

   private:
    void findTempsAsSubModelOutputs();

    struct Body {
        virtual ~Body() {}
        virtual void dump() const = 0;
        virtual int finish(const ModelBuilder* fromModel, int32_t executionPreference) = 0;
        virtual bool hasSubModelOutputsOfUnknownSize() const = 0;
        bool mSuccessfulFinish = false;
    };

    struct SimpleBody : Body {
        SimpleBody(std::shared_ptr<Device> device, const ModelBuilder* model,
                   const std::string* cacheDir, const uint8_t* token)
            : mDevice(device), mModel(model), mCacheDir(cacheDir), mToken(token) {}

        void dump() const override;
        int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
        virtual bool hasSubModelOutputsOfUnknownSize() const override { return false; }

        std::shared_ptr<Device> mDevice;
        const ModelBuilder* mModel;
        std::shared_ptr<PreparedModel> mPreparedModel;

        const std::string* mCacheDir;
        TokenHasher mToken;
    };

    struct CompoundBody : Body {
        void dump() const override;
        int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
        virtual bool hasSubModelOutputsOfUnknownSize() const override {
            return mHasSubModelOutputOfUnknownSize;
        }

        // TODO: Some of the data is working state information that
        // shouldn't be needed after we've constructed but not
        // executed the plan.

        std::vector<std::shared_ptr<ExecutionStep>> mSteps;

        // Map from original operand index to defining step index.
        // Used for all (and only) TEMPORARY_VARIABLEs.
        std::unordered_map<uint32_t, uint32_t> mTemporaryToDefiningStep;

        bool mHasSubModelOutputOfUnknownSize = false;

       private:
        void findTempsAsSubModelOutputs();
    };

    enum { EMPTY, SIMPLE, COMPOUND } mState = EMPTY;
    Body* mBody = nullptr;
    SimpleBody* simple() {
        CHECK(mState == SIMPLE);
        CHECK(mBody != nullptr);
        return static_cast<SimpleBody*>(mBody);
    }
    const SimpleBody* simple() const {
        CHECK(mState == SIMPLE);
        CHECK(mBody != nullptr);
        return static_cast<const SimpleBody*>(mBody);
    }
    CompoundBody* compound() {
        CHECK(mState == COMPOUND);
        CHECK(mBody != nullptr);
        return static_cast<CompoundBody*>(mBody);
    }
    const CompoundBody* compound() const {
        CHECK(mState == COMPOUND);
        CHECK(mBody != nullptr);
        return static_cast<const CompoundBody*>(mBody);
    }

    // Pointers to compilation caching information in CompilationBuilder.
    const std::string* mCacheDir = nullptr;
    const uint8_t* mToken = nullptr;
};

inline std::ostream& operator<<(std::ostream& out, ExecutionPlan::Kind kind) {
    const int intKind = static_cast<int>(kind);
    if (kind < ExecutionPlan::Kind::ERROR || kind > ExecutionPlan::Kind::COMPOUND) {
        return out << "<UNK(" << intKind << ")>";
    }
    static const char* name[] = {"ERROR", "EMPTY", "SIMPLE", "COMPOUND"};
    return out << name[intKind];
}

}  // namespace nn
}  // namespace android

#endif  // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_EXECUTION_PLAN_H