MNN createSession 之 Schedule（三）

在这里插入图片描述

系列文章目录

MNN createFromBuffer（一）
MNN createRuntime（二）
MNN createSession 之 Schedule（三）
MNN createSession 之创建流水线后端（四）
MNN Session::resize 之流水线编码（五）
MNN Session 创建执行器（六）

文章目录

系列文章目录
1、createSession
- 1.1 createMultiPathSession
- 1.1.1 Schedule 类 OpCacheInfo、BackendCache、PipelineInfo、ScheduleInfo
- 1.1.1.1 Backend 类 Backend::Info
- 1.1.2 Schedule::schedule
- 1.1.2.1 initConstTensors
- 1.1.2.2 initTensors
- 1.1.2.3 _scheduleUnit
- 1.1.2.3.1 generateScheduleGraph
- 1.1.2.3.2 initPipelineInfosFromOps
- 1.1.2.3.3 Op 算子
- 1.1.2.4 setInputOutputForOps
- 1.1.2.5 GeometryComputerUtils::buildConstantTensors
- 1.1.2.5.1 OpCommonUtils::opNeedContent
- 1.1.3 Tensor 张量
- 1.1.3.1 Tensor::InsideDescribe
- 1.1.3.1.1 NativeInsideDescribe
- 1.1.3.1.1.1 RefCount

1、createSession

在这里插入图片描述

依据 ScheduleConfig 和 RuntimeInfo 创建会话。

// source/core/Interpreter.cpp
Session* Interpreter::createSession(const ScheduleConfig& config) {
	// createMultiPathSession 会根据 ScheduleConfig 创建 RuntimeInfo
    return createMultiPathSession({config});
}

Session* Interpreter::createSession(const ScheduleConfig& config, const RuntimeInfo& runtime) {
    return createMultiPathSession({config}, runtime);
}

1.1 createMultiPathSession

// source/core/Interpreter.cpp
Session* Interpreter::createMultiPathSession(const std::vector<ScheduleConfig>& configs) {
    RuntimeInfo runtime = createRuntime(configs);
    runtime.second->setExternalFile(mNet->externalFile);
    runtime.second->setAllocatorType(mNet->modes.memoryAllocatorType);
    if (runtime.first.empty()) {
        MNN_ERROR("Runtime not valid for create session\n");
        return nullptr;
    }
    return createMultiPathSession(configs, std::move(runtime));
}

Session* Interpreter::createMultiPathSession(const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtime) {
    if (nullptr == mNet->buffer.get()) {
        MNN_ERROR("The model buffer has been released. Can't create session\n");
        return nullptr;
    }
    if (runtime.first.empty()) {
        MNN_ERROR("Runtime not valid for create session\n");
        return nullptr;
    }
    std::unique_lock<std::mutex> _l(mNet->lock);
#ifdef MNN_INTERNAL_ENABLED
    Timer _timer;
#endif
    int cacheMode = 0; // No cache
    // 创建 Schedule，并进行初始化
    Schedule::ScheduleInfo info;
    auto success = Schedule::schedule(info, mNet->net, configs, runtime);
    if (!success) {
        return nullptr;
    }
    RuntimeInfo rt = runtime;
    bool valid  = false;
    if (mNet->cacheBuffer.get() != nullptr) {
        for (auto iter : rt.first) {
            valid = iter.second->onSetCache(mNet->cacheBuffer.get(),
                                            mNet->cacheBuffer.size());
            if(!valid) {
                iter.second->onSetCache(nullptr, 0);
            }
            if (valid) {
                break;
            }
        }
        if (valid) {
            mNet->lastCacheSize = mNet->cacheBuffer.size();
            cacheMode = cacheMode | 1; // READ cache
        }
    }

    auto newSession =
        std::unique_ptr<Session>(new Session(std::move(info), mNet->modes, std::move(rt)));
    if (!newSession->valid()) {
        MNN_PRINT("Invalide Session!!\n");
        return nullptr;
    }
    auto result = newSession.get();
    auto validForResize = info.validForResize;
    if (validForResize && mNet->modes.inputMode == Session_Input_Inside && mNet->modes.resizeMode == Session_Resize_Direct) {
        result->resize();
    }

    if ((!mNet->cacheFile.empty()) && (!valid) && mNet->modes.backendMode == Session_Backend_Fix) {
        // Try to save extra cache
        auto buffer = result->getCache();
        if (buffer.first != nullptr && buffer.second > 0) {
            MNN_PRINT("Write cache to %s, size = %zu\n", mNet->cacheFile.c_str(), buffer.second);
            writeCacheFile(mNet, buffer);
            mNet->lastCacheSize = buffer.second;
            // Write Cache
            cacheMode = cacheMode | 2;
        }
    }
    // Reset cache
    result->loadCache(nullptr, 0);

    mNet->sessions.emplace_back(std::move(newSession));

#ifdef MNN_INTERNAL_ENABLED
    int precision = BackendConfig::Precision_Normal;
    if (nullptr != configs[0].backendConfig) {
        precision = configs[0].backendConfig->precision;
    }
    int mode = configs[0].mode;
    mNet->sessionInfo.insert(std::make_pair(result, std::make_tuple(precision, mode)));
    if (shouldLog(FREQ_HIGH)) {
        std::map<std::string, std::string> metrics = mNet->basicLogginData;
        metrics.emplace("UUID", mNet->uuid);
        metrics.emplace("Time", std::to_string((float)_timer.durationInUs() / 1024.0f));
        metrics.emplace("Backend", std::to_string(configs[0].type));
        metrics.emplace("Precision", std::to_string(precision));
        metrics.emplace("Mode", std::to_string(mode));
        metrics.emplace("Cache", std::to_string(cacheMode));
        metrics.emplace("CacheSize", std::to_string((float)(mNet->lastCacheSize / 1024.0f)));
        metrics.emplace("ModelSize", std::to_string ((float)mNet->buffer.size() / 1024.0f / 1024.0f));
        metrics.emplace("Usage", std::to_string((int) mNet->net->usage()));
        metrics.emplace("API", "Interpreter::createMultiPathSession");
        logAsync(metrics);
    }
#endif // MNN_INTERNAL_ENABLED

    return result;
}

1.1.1 Schedule 类 OpCacheInfo、BackendCache、PipelineInfo、ScheduleInfo

调度器

/** net scheduler */
class MNN_PUBLIC Schedule {
public:
    enum Type {
        // Size can be compute separately
        SEPARATE = 0,
        // When size is fixed, the content is fixed
        CONSTANT = 1,
        // Size can't be compute separately
        NOT_SEPERATE
    };
    /** pipeline info */
    struct OpCacheInfo {
        /** op */
        const Op* op;
        /** input tensors */
        std::vector<Tensor*> inputs;
        /** output tensors */
        std::vector<Tensor*> outputs;
        /** schedule type*/
        Schedule::Type type = Schedule::Type::SEPARATE;

        /**Command buffer for cache*/
        CommandBuffer cacheBuffer;

        /**Command buffer for execute*/
        CommandBuffer executeBuffer;
        
        std::map<const Op*, std::shared_ptr<Execution>> executionCache;
    };

    // Backend, Tensor, shape-dirty, content-dirty
    typedef std::tuple<Tensor*, std::shared_ptr<Tensor>, bool, bool> TENSORCACHE;
    struct BackendCache {
        Backend::Info info;
        BackendConfig config;
        std::pair<std::shared_ptr<Backend>, std::shared_ptr<Backend>> cache;
        bool needComputeShape = true;
        bool needComputeGeometry = true;
        bool reportError = true;
        std::map<Tensor*, TENSORCACHE> inputTensorCopyCache;
    };
    typedef std::pair<BackendCache, std::vector<OpCacheInfo>> PipelineInfo;

    /** schedule info */
    struct ScheduleInfo {
        /** pipelines with backend info */
        std::vector<PipelineInfo> pipelineInfo;
        /** input tensors map */
        std::map<std::string, Tensor*> inputTensors;
        /** output tensors map */
        std::map<std::string, Tensor*> outputTensor;
        /** all tensors */
        std::vector<std::shared_ptr<Tensor>> allTensors;
        /** input valid for resize*/
        bool validForResize;
        /** Default Backend for alloc const*/
        std::shared_ptr<Backend> defaultBackend;
        /** Replace Backend for alloc const*/
        std::shared_ptr<Backend> constReplaceBackend;
        /** size need input's content*/
        bool needInputContentForShape = false;
    };

    /**
     * @breif schedule net ops to pipeline with configuration.
     * @param net       given net.
     * @param config    given configuration.
     * @return schedule info.
     */
    static bool schedule(ScheduleInfo& result, const Net* net, const std::vector<ScheduleConfig>& config, const RuntimeInfo& runtimeInfo);
    static MNNForwardType getApprociateType(const ScheduleConfig& config);
};

1.1.1.1 Backend 类 Backend::Info

// source/core/Backend.hpp
class Backend : public NonCopyable {

public:
    /** info used to create backend */
    struct Info {
        /** forward type. */
        MNNForwardType type = MNN_FORWARD_CPU;
        /** numThread for CPU . number of threads.  gpuMode for GPU only. tuning/memory Mode setting. */
        union {
            int numThread = 4;
            int gpuMode;
        };
        /** user data. */
        BackendConfig* user = NULL;
        enum Mode {
            // The Op will be run in execution->onExecute
            DIRECT = 0,

            // The Op will be recorded. Run in onExecuteBegin and Wait in onExecuteEnd
            INDIRECT = 1
        };
        Mode mode = DIRECT;
        enum Allocator {
            DEFER = 0,
            EAGER = 1
        };
        Allocator allocator = DEFER;
    };

    /** backend buffer storage type */
    enum StorageType {
        /**
         use NOT reusable memory.
         - allocates memory when `onAcquireBuffer` is called.
         - releases memory when `onReleaseBuffer` is called or when the backend is deleted.
         - do NOTHING when `onClearBuffer` is called.
         */
        STATIC,
        /**
         use reusable memory.
         - allocates or reuses memory when `onAcquireBuffer` is called. prefers reusing.
         - collects memory for reuse when `onReleaseBuffer` is called.
         - releases memory when `onClearBuffer` is called or when the backend is deleted.
         */
        DYNAMIC,
        /**
         use NOT reusable memory.
         - allocates memory when `onAcquireBuffer` is called.
         - do NOTHING when `onReleaseBuffer` is called.
         - releases memory when `onClearBuffer` is called or when the backend is deleted.
         */
        DYNAMIC_SEPERATE
    };

public:
    /**
     * @brief initializer.
     * @param type  forward type.
     */
    Backend(MNNForwardType type) : mType(type) {
        // nothing to do
    }

    /**
     * @brief deinitializer.
     */
    virtual ~Backend() = default;

public:

    /**
     * @brief create execution for op with input and output tensors.
     * @param inputs    input tensors.
     * @param outputs   output tensors.
     * @param op        given op.
     * @return created execution if op is supported, nullptr otherwise.
     */
    virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
                                const MNN::Op* op) = 0;

    /**
     * @brief callback before resize ops.
     */
    virtual void onResizeBegin() {
        // nothing to do
    }
    /**
     * @brief callback after resize ops.
     */
    virtual ErrorCode onResizeEnd() = 0;

    /**
     * @brief callback before executing ops.
     */
    virtual void onExecuteBegin() const = 0;
    /**
     * @brief callback after executing ops.
     */
    virtual void onExecuteEnd() const = 0;

    virtual const Runtime* getRuntime() {
        return nullptr;
    }
    const std::string externalFile();
public:
    /**
     * @brief allocate buffer of tensor for given storage type.
     * @param tensor        buffer provider.
     * @param storageType   buffer storage type.
     * @return success or not.
     */
    MNN_PUBLIC bool onAcquireBuffer(const Tensor* tensor, StorageType storageType);

    /**
     * @brief release buffer of tensor for given storage type.
     * @param tensor        buffer provider.
     * @param storageType   buffer storage type.
     * @return success or not.
     */
    MNN_PUBLIC bool onReleaseBuffer(const Tensor* tensor, StorageType storageType);

    class MemObj {
    public:
        MemObj() {}
        virtual ~ MemObj() {}
        virtual MemChunk chunk() { return MemChunk(); }
    };
    /**
     * @brief allocate buffer of tensor for given storage type.
     * @param tensor        buffer provider.
     * @param storageType   buffer storage type.
     * @return MemObj for release, if failed, return nullptr.
     */
    virtual MemObj* onAcquire(const Tensor* tensor, StorageType storageType) = 0;
    
    /**
     * @brief get buffer from tensor directly
     * @param tensor        buffer provider.
     * @return support or not
     */
    virtual bool onGetTensorInfo(const Tensor* tensor, void* dstInfo) {
        return false;
    }

    /**
     * @brief clear all dynamic buffers.
     * @return success or not.
     */
    virtual bool onClearBuffer() = 0;

    /**
     * @brief copy buffer from tensor to tensor.
     * @param srcTensor source buffer provider.
     * @param dstTensor dest buffer provider.
     */
    virtual void onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const = 0;

public:
    /**
     * @brief get forward type.
     * @return forward type.
     */
    inline MNNForwardType type() const {
        return mType;
    }

public:
    /**
     * @brief get Gpu Tensor map host ptr/ unmap
     */
    virtual void* onMapTensor(Tensor::MapType mtype, Tensor::DimensionType dtype, const Tensor* srcTensor) {
        return nullptr;
    }

    virtual bool onUnmapTensor(Tensor::MapType mtype, Tensor::DimensionType dtype, const Tensor* dstTensor, void* mapPtr) {
        return false;
    }

    virtual int onSync(Tensor::MapType mtype, bool toCpu, const Tensor* dstTensor) {
        return 0;
    }

private:
    const MNNForwardType mType;
};

1.1.2 Schedule::schedule

bool Schedule::schedule(ScheduleInfo& scheduleInfo, const Net* net, const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtimeInfo) {
    if (nullptr == net->oplists()) {
        MNN_PRINT("Empty net for schedule\n");
        return false;
    }
    if (scheduleInfo.defaultBackend.get() == nullptr && scheduleInfo.allTensors.empty()) {
        // Const not init, init it
        BackendConfig defaultConfig;
        defaultConfig.flags = 4;
        // 创建默认的后端，即 CPUBackend ，source/backend/cpu/CPUBackend.cpp
        scheduleInfo.defaultBackend.reset(runtimeInfo.second->onCreate(&defaultConfig));
        ErrorCode code = NO_ERROR;
        initConstTensors(scheduleInfo.allTensors, net, scheduleInfo.defaultBackend.get(), code);
        if (NO_ERROR != code) {
            MNN_ERROR("Schedule Const init errorcode = %d\n", code);
            return false;
        }
    }
    bool valid = initTensors(scheduleInfo.allTensors, net);
    scheduleInfo.validForResize = valid;
    std::vector<std::shared_ptr<Tensor>>& allTensors = scheduleInfo.allTensors;
    std::vector<std::pair<Schedule::BackendCache, std::vector<Schedule::OpCacheInfo>>> result;

    for (auto& config : configs) {
        Backend::Info compute;
        compute.type      = getApprociateType(config);
        compute.numThread = config.numThread;
        if(config.type == MNN_FORWARD_AUTO) {
            if(compute.type == MNN_FORWARD_OPENCL || compute.type == MNN_FORWARD_METAL) {
                // AUTO set default gpu-mode MNN_GPU_TUNING_FAST
                compute.numThread = 16;
            }
        }
        compute.user      = config.backendConfig;
        // 初始化算子和张量
        auto oplists      = _scheduleUnit(net, config, allTensors);
        Schedule::BackendCache cache;
        cache.info = std::move(compute);
        result.emplace_back(std::make_pair(cache, std::move(oplists)));
    }

    scheduleInfo.pipelineInfo = std::move(result);

    // get all used op's output, drop unused op, won't change op order. always insert all Input Ops
    std::vector<const Op*> oplists;
    {
        for (std::pair<Schedule::BackendCache, vector<Schedule::OpCacheInfo>>& pipeline : scheduleInfo.pipelineInfo) {
            for (auto& info : pipeline.second) {
                oplists.push_back(info.op);
            }
        }
    }
    // set tensors' input/output usage by oplists info
    setInputOutputForOps(allTensors, oplists, net->usage() == Usage_INFERENCE_STATIC);

    // add output index by config info and outputName
    std::unordered_map<std::string, int> tensorNameIndexMap;
    for (int i = 0; i < net->tensorName()->size(); ++i) {
        tensorNameIndexMap[net->tensorName()->Get(i)->str()] = i;
    }
    bool userSetOutput = false;
    // 初始化调度输出张量
    for (auto& config : configs) {
        userSetOutput = userSetOutput || (!config.saveTensors.empty());
        for (const auto& name : config.saveTensors) {
            auto iter = tensorNameIndexMap.find(name);
            if (iter != tensorNameIndexMap.end()) {
                auto t = allTensors[iter->second].get();
                if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::NORMAL) {
                    TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::OUTPUT;
                }
                scheduleInfo.outputTensor.insert(
                           std::make_pair(net->tensorName()->GetAsString(iter->second)->c_str(), t));
            } else {
                MNN_PRINT("Bad outputname: %s\n", name.c_str());
            }
        }
    }
    // 初始化调度输出张量
    if (net->outputName()) {
        userSetOutput = userSetOutput || net->outputName()->size() >= 1;
        for (int i = 0; i < net->outputName()->size(); ++i) {
            std::string name = net->outputName()->Get(i)->str();
            auto iter = tensorNameIndexMap.find(name);
            if (iter != tensorNameIndexMap.end()) {
                auto t = allTensors[iter->second].get();
                if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::NORMAL) {
                    TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::OUTPUT;
                }
                scheduleInfo.outputTensor.insert(
                               std::make_pair(net->tensorName()->GetAsString(iter->second)->c_str(), t));
            }
        }
    }
    if (scheduleInfo.outputTensor.empty()) {
        userSetOutput = false;
    }
    // add input/output tensor to schedule's input/output
    // 初始化调度输入和输出张量
    for (int index = 0; index < allTensors.size(); index++) {
        auto t = allTensors[index].get();
        auto usage = TensorUtils::getDescribe(t)->usage;
        if (usage == Tensor::InsideDescribe::INPUT) {
        	// 如 inputTensors 大小为 1
            scheduleInfo.inputTensors.insert(std::make_pair(net->tensorName()->GetAsString(index)->c_str(), t));
        }
        if (usage == Tensor::InsideDescribe::OUTPUT && (!userSetOutput)) {
        	// 如 outputTensor 大小为 3
            scheduleInfo.outputTensor.insert(
                       std::make_pair(net->tensorName()->GetAsString(index)->c_str(), t));
        }
    }
    if (net->usage() == Usage_INFERENCE_STATIC) {
        for (auto& pipInfo : scheduleInfo.pipelineInfo) {
            pipInfo.first.needComputeGeometry = false;
            pipInfo.first.needComputeShape = false;
        }
    }

#ifndef MNN_BUILD_MINI
    for (auto iter = scheduleInfo.pipelineInfo.begin(); iter != scheduleInfo.pipelineInfo.end();) {
        if (!iter->first.needComputeGeometry) {
            // For static model don't need check const
            iter++;
            continue;
        }
        auto breakIndex = GeometryComputerUtils::buildConstantTensors(iter->second);
        if (breakIndex >= 0) {
            scheduleInfo.needInputContentForShape = true;
        }
#ifdef MNN_SEPERTE_SIZE
        if (breakIndex >= 0 && (breakIndex + 1) < iter->second.size()) {
            // Split oplist
            std::vector<Schedule::PipelineInfo> fuse;
            std::vector<Schedule::PipelineInfo> separate;
            fuse.insert(fuse.begin(), iter->second.begin(), iter->second.begin() + breakIndex + 1);
            separate.insert(separate.begin(), iter->second.begin() + breakIndex + 1, iter->second.end());
            oplists.clear();
            iter->second = std::move(separate);
            iter = scheduleInfo.pipelineInfo.insert(iter, std::make_pair(iter->first, fuse));
            iter++;
            iter++;
        } else {
            iter++;
        }
#else
        iter++;
#endif
    }
#endif
    return true;
}

1.1.2.1 initConstTensors

// source/utils/InitNet.cpp
bool initConstTensors(std::vector<std::shared_ptr<Tensor>>& tensors, const Net* net, Backend* defaultBackend, ErrorCode& code) {
    bool valid    = true;
    // 按算子数分配 Tensor 张量容量，如 208
    tensors.resize(net->tensorName()->size());
    // Set up const
    // 算子数 net->oplists()->size() 为 208
    for (int opIndex = 0; opIndex < net->oplists()->size(); ++opIndex) {
        auto op = net->oplists()->GetAs<Op>(opIndex);
        // 不变算子和可训练参数 初始化
        if (OpType_Const == op->type() || OpType_TrainableParam == op->type()) {
            MNN_ASSERT(nullptr != op->outputIndexes());
            auto index = op->outputIndexes()->data()[0];
            tensors[index].reset(new Tensor);
            TensorUtils::getDescribe(tensors[index].get())->index = index;
            auto parameter = op->main_as_Blob();
            auto output    = tensors[index].get();
            bool zeroShape = false;
            if (parameter->dims() != nullptr) {
                output->buffer().dimensions = parameter->dims()->size();
                for (int i = 0; i < output->buffer().dimensions; i++) {
                    output->buffer().dim[i].extent = parameter->dims()->Get(i);
                    if (output->length(i) <= 0) {
                        zeroShape = true;
                    }
                }
            } else {
                output->buffer().dimensions = 0;
            }
            if (parameter->dataType() == DataType_DT_HALF) {
                output->setType(DataType_DT_FLOAT);
            } else {
                output->setType(parameter->dataType());
            }
            TensorUtils::getDescribe(output)->dimensionFormat = parameter->dataFormat();
            TensorUtils::getDescribe(output)->usage = Tensor::InsideDescribe::CONSTANT;
            TensorUtils::getDescribe(output)->isMutable = false;
            if (op->type() == OpType_TrainableParam) {
                TensorUtils::getDescribe(output)->usage = Tensor::InsideDescribe::TRAINABLE;
            }
            TensorUtils::setLinearLayout(output);
            TensorUtils::getDescribe(output)->setBackend(defaultBackend);
            //MNN_PRINT("Const tensor %p is %p bn\n", output, defaultBackend);
            if (zeroShape) {
                continue;
            }
            auto res = defaultBackend->onAcquireBuffer(output, Backend::STATIC);
            if (!res) {
                code = OUT_OF_MEMORY;
                return false;
            }
            if (parameter->dataType() == DataType_DT_HALF) {
                if (nullptr == parameter->uint8s()) {
                    // Error half const
                    code = INVALID_VALUE;
                    return false;
                }
                auto outputPtr = output->host<float>();
                auto size = output->elementSize();
                half_float::half* src = nullptr;
                std::unique_ptr<half_float::half[]> tmp;
                if (USE_EXTERNAL_DATA(parameter)) {
                    tmp.reset((new half_float::half[size]));
                    src = tmp.get();
                    OpCommonUtils::loadExternalDatas(defaultBackend, {reinterpret_cast<char*>(src)}, parameter->external()->data());
                } else {
                    src = (half_float::half*)parameter->uint8s()->data();
                }
                for (int i=0; i<size; ++i) {
                    outputPtr[i] = src[i];
                }
            } else {
                OpCommonUtils::loadBlobData(defaultBackend, op, output->host<char>(), output->size());
            }
        } else {
            if (nullptr != op->outputIndexes()) {
                for (int i=0; i<op->outputIndexes()->size(); ++i) {
                    auto index = op->outputIndexes()->data()[i];
                    if (nullptr == tensors[index].get()) {
                        continue;
                    }
                    auto des = TensorUtils::getDescribe(tensors[index].get());
                    if (des->usage == Tensor::InsideDescribe::CONSTANT) {
                        des->usage = Tensor::InsideDescribe::TRAINABLE;
                    }
                }
            }
        }
    }
    return valid;
}

1.1.2.2 initTensors

初始化张量

// source/utils/InitNet.cpp
bool initTensors(std::vector<std::shared_ptr<Tensor>>& tensors, const Net* net) {
    bool valid    = true;
    auto describes = net->extraTensorDescribe();
    std::vector<const TensorDescribe*> des(tensors.size());
    for (int i=0; i<tensors.size(); ++i) {
        // Init all tensor except for const
        if (tensors[i].get() == nullptr) {
            tensors[i].reset(new Tensor);
            TensorUtils::getDescribe(tensors[i].get())->index = i;
            // MNN_PRINT("initTensors create tensor:%p, index:%d, backend:%d\n", tensors[i].get(), i, TensorUtils::getDescribe(tensors[i].get())->backend);
        }
    }
    if (describes) {
        for (int i = 0; i < describes->size(); i++) {
            int index  = describes->GetAs<TensorDescribe>(i)->index();
            des[index] = describes->GetAs<TensorDescribe>(i);
        }
    }
    for (int i = 0; i < tensors.size(); ++i) {
        if (des[i] != nullptr && des[i]->quantInfo()) {
            TensorUtils::getDescribe(tensors[i].get())->quantAttr.reset(new QuantAttr);
            auto quant   = TensorUtils::getDescribe(tensors[i].get())->quantAttr.get();
            quant->scale =  des[i]->quantInfo()->scale();
            quant->zero  =  des[i]->quantInfo()->zero();
            quant->min   =  des[i]->quantInfo()->min();
            quant->max   =  des[i]->quantInfo()->max();
            // Don't copy datatype, it can be set by backend
        }
    }
    // Set Input Tensor, if the type of input is not the same with ExtraTensorDescribe, use input parameter
    for (int opIndex = 0; opIndex < net->oplists()->size(); ++opIndex) {
        auto op = net->oplists()->GetAs<Op>(opIndex);
        if (OpType_Input == op->type()) {
            MNN_ASSERT(nullptr != op->outputIndexes());
            MNN_ASSERT(op->outputIndexes()->size() == 1);
            auto index      = op->outputIndexes()->data()[0];
            auto tensor     = tensors[index].get();
            auto& tb        = tensor->buffer();
            auto inputParam = op->main_as_Input();
            if (auto idims = inputParam->dims()) {
                for (int i = 0; i < idims->size(); ++i) {
                    int extent = idims->data()[i];
                    // dim-0 is batch(when input batch is -1, set it to be 1, ignore other dim)
                    if (i == 0 && extent == -1) {
                        extent = 1;
                    }
                    if (extent < 0) {
                        valid = false;
                    }
                    tb.dim[i].extent = extent;
                }
                tb.dimensions = idims->size();
            } else {
                tb.dimensions = 0;
            }
            tensor->setType(inputParam->dtype());
            TensorUtils::getDescribe(tensor)->dimensionFormat = inputParam->dformat();
            TensorUtils::setLinearLayout(tensor);
        }
    }
    if (net->usage() != Usage_INFERENCE_STATIC) {
        return valid;
    }
    // static model will set all tensors' shape
    for (int i = 0; i < describes->size(); i++) {
        int index  = describes->GetAs<TensorDescribe>(i)->index();
        des[index] = describes->GetAs<TensorDescribe>(i);
    }
    for (int i = 0; i < tensors.size(); ++i) {
        if (TensorUtils::getDescribe(tensors[i].get())->usage != Tensor::InsideDescribe::NORMAL) {
            // Const / Trainable Shape has been inited
            continue;
        }
        auto blob = des[i]->blob();
        auto& tb = tensors[i]->buffer();
        if (auto idims = blob->dims()) {
            for (int d = 0; d < idims->size(); d++) {
                tb.dim[d].extent = idims->Get(d);
            }
            tb.dimensions = idims->size();
        } else {
            tb.dimensions = 0;
        }
        tensors[i]->setType(blob->dataType());
    }
    for (int i = 0; i < tensors.size(); ++i) {
        auto blob                                                   = des[i]->blob();
        TensorUtils::getDescribe(tensors[i].get())->dimensionFormat = blob->dataFormat();
        if (auto regions = des[i]->regions()) {
            auto& regs = TensorUtils::getDescribe(tensors[i].get())->regions;
            TensorUtils::getDescribe(tensors[i].get())->memoryType = Tensor::InsideDescribe::MEMORY_BACKEND;
            regs.reserve(regions->size());
            for (int r = 0; r < regions->size(); r++) {
                auto region = regions->GetAs<Region>(r);
                Tensor::InsideDescribe::Region reg;
                reg.origin     = tensors[region->origin()].get();
                reg.src.offset = region->src()->offset();
                reg.dst.offset = region->dst()->offset();
                for (int d = 0; d < 3; d++) {
                    reg.size[d]       = region->size()->data()[d];
                    reg.src.stride[d] = region->src()->stride()->data()[d];
                    reg.dst.stride[d] = region->dst()->stride()->data()[d];
                }
                regs.emplace_back(std::move(reg));
            }
        }
    }
    return valid;
}

1.1.2.3 _scheduleUnit

// source/core/Schedule.cpp
static vector<Schedule::OpCacheInfo> _scheduleUnit(const Net* net, const ScheduleConfig& configs,
                                                    const vector<shared_ptr<Tensor>>& allTensors) {
    vector<Schedule::OpCacheInfo> oplists;
    vector<const Op*> ops;
    generateScheduleGraph(ops, net, configs, allTensors);
    initPipelineInfosFromOps(oplists, ops, allTensors);
    return oplists;
}

1.1.2.3.1 generateScheduleGraph

产生调度图谱

// source/core/Schedule.cpp
static void generateScheduleGraph(vector<const Op*>& ops, const Net* net, const ScheduleConfig& configs,
                                  const vector<shared_ptr<Tensor>>& allTensors) {

        // for (int i = 0; i < net->oplists()->size(); ++i) {
        //     auto op       = net->oplists()->Get(i);
        //     MNN_PRINT("generateScheduleGraph, op type:%s, op name:%s\n", EnumNameOpType(op->type()), op->name()->c_str());
        // }

    if (configs.path.inputs.empty() && configs.path.outputs.empty()) {
        // Use Default Linear schedule
        ops.clear();
        ops.reserve(net->oplists()->size());
        // 获取算子，208
        for (int i = 0; i < net->oplists()->size(); ++i) {
            auto op = net->oplists()->GetAs<Op>(i);
            ops.emplace_back(op);
        }
        return;
    }
    // 0: not set, 1: output, 2:input
    std::vector<int> tensorMask(net->tensorName()->size());
    ::memset(tensorMask.data(), 0, tensorMask.size() * sizeof(int));

    // 0: use, 1: no use
    std::vector<int> opMask(net->oplists()->size());
    ::memset(opMask.data(), 0, opMask.size() * sizeof(int));

    // Set Initial Status
    std::set<std::string> inputNames;
    std::set<std::string> outputNames;
    for (auto& n : configs.path.inputs) {
        inputNames.insert(n);
    }
    for (auto& n : configs.path.outputs) {
        outputNames.insert(n);
    }
    if (configs.path.mode == ScheduleConfig::Path::Mode::Tensor) {
        for (int i=0; i<tensorMask.size(); ++i) {
            auto name = net->tensorName()->GetAsString(i)->c_str();
            if (outputNames.find(name) != outputNames.end()) {
                tensorMask[i] = 1;
            }
            // If both input/output, set as input
            if (inputNames.find(name) != inputNames.end()) {
                tensorMask[i] = 2;
            }
        }
    } else {
        // Op Mode
        for (int i=0; i<opMask.size(); ++i) {
            auto op = net->oplists()->GetAs<Op>(i);
            if (nullptr == op->name()) {
                continue;
            }
            auto name = op->name()->c_str();
            if (outputNames.find(name) != outputNames.end()) {
                opMask[i] = 1;
                if (nullptr != op->outputIndexes()) {
                    for (int j=0; j<op->outputIndexes()->size(); ++j) {
                        auto index = op->outputIndexes()->data()[j];
                        if (tensorMask[index] != 2) {
                            tensorMask[index] = 1;
                        }
                    }
                }
                if (nullptr != op->inputIndexes()) {
                    for (int j=0; j<op->inputIndexes()->size(); ++j) {
                        auto index = op->inputIndexes()->data()[j];
                        if (tensorMask[index] != 2) {
                            tensorMask[index] = 1;
                        }
                    }
                }
            }
            if (inputNames.find(name) != inputNames.end()) {
                opMask[i] = 1;
                if (nullptr != op->outputIndexes()) {
                    for (int j=0; j<op->outputIndexes()->size(); ++j) {
                        auto index = op->outputIndexes()->data()[j];
                        tensorMask[index] = 2;
                    }
                }
            }
        }
    }

    bool change = false;
    do {
        change = false;
        for (int i=0; i<opMask.size(); ++i) {
            if (opMask[i] > 0) {
                continue;
            }
            auto op = net->oplists()->GetAs<Op>(i);
            if (nullptr != op->outputIndexes()) {
                for (int j=0; j<op->outputIndexes()->size(); ++j) {
                    auto index = op->outputIndexes()->data()[j];
                    if (tensorMask[index] == 1) {
                        opMask[i] = 1;
                        change = true;
                    }
                }
            }
            if (nullptr != op->inputIndexes() && opMask[i]) {
                for (int j=0; j<op->inputIndexes()->size(); ++j) {
                    auto index = op->inputIndexes()->data()[j];
                    if (tensorMask[index] != 2) {
                        tensorMask[index] = 1;
                    }
                }
            }
        }
    } while (change);

    for (int i=0; i<opMask.size(); ++i) {
        if (opMask[i] > 0) {
            ops.emplace_back(net->oplists()->GetAs<Op>(i));
        }
    }
}

1.1.2.3.2 initPipelineInfosFromOps

// source/utils/InitNet.cpp
void initPipelineInfosFromOps(std::vector<Schedule::OpCacheInfo>& infos, std::vector<const Op*>& ops, const std::vector<std::shared_ptr<Tensor>>& allTensors) {
    for (const Op* op : ops) {
        // MNN_PRINT("initPipelineInfosFromOps, op type:%s, op name:%s\n", EnumNameOpType(op->type()), op->name()->c_str());

		// 算子缓存信息
        Schedule::OpCacheInfo opInfo;
        opInfo.op = op;
        if (nullptr != op->outputIndexes()) {
            auto data = op->outputIndexes()->data();
            for (int j = 0; j < op->outputIndexes()->size(); ++j) {
            	// 设置算子缓存输出张量信息
                opInfo.outputs.push_back(allTensors[data[j]].get());
            }
        }
        if (nullptr != op->inputIndexes()) {
            auto data = op->inputIndexes()->data();
            for (int j = 0; j < op->inputIndexes()->size(); ++j) {
            	// 设置算子缓存输入张量信息
                opInfo.inputs.push_back(allTensors[data[j]].get());
            }
        }
        if (needComputeOp(op)) {
            infos.emplace_back(std::move(opInfo));
        }
    }
}

1.1.2.3.3 Op 算子

// schema/current/MNN_generated.h
struct Op FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
  typedef OpT NativeTableType;
  static const flatbuffers::TypeTable *MiniReflectTypeTable() {
    return OpTypeTable();
  }
  const flatbuffers::Vector<int32_t> *inputIndexes() const {
    return GetPointer<const flatbuffers::Vector<int32_t> *>(4);
  }
  OpParameter main_type() const {
    return static_cast<OpParameter>(GetField<uint8_t>(6, 0));
  }
  const void *main() const {
    return GetPointer<const void *>(8);
  }
  template<typename T> const T *main_as() const;
  const QuantizedAdd *main_as_QuantizedAdd() const {
    return main_type() == OpParameter_QuantizedAdd ? static_cast<const QuantizedAdd *>(main()) : nullptr;
  }
  const ArgMax *main_as_ArgMax() const {
    return main_type() == OpParameter_ArgMax ? static_cast<const ArgMax *>(main()) : nullptr;
  }
  const AsString *main_as_AsString() const {
    return main_type() == OpParameter_AsString ? static_cast<const AsString *>(main()) : nullptr;
  }
  const Axis *main_as_Axis() const {
    return main_type() == OpParameter_Axis ? static_cast<const Axis *>(main()) : nullptr;
  }
  const BatchNorm *main_as_BatchNorm() const {
    return main_type() == OpParameter_BatchNorm ? static_cast<const BatchNorm *>(main()) : nullptr;
  }
  const BinaryOp *main_as_BinaryOp() const {
    return main_type() == OpParameter_BinaryOp ? static_cast<const BinaryOp *>(main()) : nullptr;
  }
  const Blob *main_as_Blob() const {
    return main_type() == OpParameter_Blob ? static_cast<const Blob *>(main()) : nullptr;
  }
  const CastParam *main_as_CastParam() const {
    return main_type() == OpParameter_CastParam ? static_cast<const CastParam *>(main()) : nullptr;
  }
  const Convolution2D *main_as_Convolution2D() const {
    return main_type() == OpParameter_Convolution2D ? static_cast<const Convolution2D *>(main()) : nullptr;
  }
  const Crop *main_as_Crop() const {
    return main_type() == OpParameter_Crop ? static_cast<const Crop *>(main()) : nullptr;
  }
  const CropAndResize *main_as_CropAndResize() const {
    return main_type() == OpParameter_CropAndResize ? static_cast<const CropAndResize *>(main()) : nullptr;
  }
  const Dequantize *main_as_Dequantize() const {
    return main_type() == OpParameter_Dequantize ? static_cast<const Dequantize *>(main()) : nullptr;
  }
  const DetectionOutput *main_as_DetectionOutput() const {
    return main_type() == OpParameter_DetectionOutput ? static_cast<const DetectionOutput *>(main()) : nullptr;
  }
  const Eltwise *main_as_Eltwise() const {
    return main_type() == OpParameter_Eltwise ? static_cast<const Eltwise *>(main()) : nullptr;
  }
  const ExpandDims *main_as_ExpandDims() const {
    return main_type() == OpParameter_ExpandDims ? static_cast<const ExpandDims *>(main()) : nullptr;
  }
  const Fill *main_as_Fill() const {
    return main_type() == OpParameter_Fill ? static_cast<const Fill *>(main()) : nullptr;
  }
  const Flatten *main_as_Flatten() const {
    return main_type() == OpParameter_Flatten ? static_cast<const Flatten *>(main()) : nullptr;
  }
  const Gather *main_as_Gather() const {
    return main_type() == OpParameter_Gather ? static_cast<const Gather *>(main()) : nullptr;
  }
  const GatherV2 *main_as_GatherV2() const {
    return main_type() == OpParameter_GatherV2 ? static_cast<const GatherV2 *>(main()) : nullptr;
  }
  const InnerProduct *main_as_InnerProduct() const {
    return main_type() == OpParameter_InnerProduct ? static_cast<const InnerProduct *>(main()) : nullptr;
  }
  const Input *main_as_Input() const {
    return main_type() == OpParameter_Input ? static_cast<const Input *>(main()) : nullptr;
  }
  const Interp *main_as_Interp() const {
    return main_type() == OpParameter_Interp ? static_cast<const Interp *>(main()) : nullptr;
  }
  const LRN *main_as_LRN() const {
    return main_type() == OpParameter_LRN ? static_cast<const LRN *>(main()) : nullptr;
  }
  const LSTM *main_as_LSTM() const {
    return main_type() == OpParameter_LSTM ? static_cast<const LSTM *>(main()) : nullptr;
  }
  const MatMul *main_as_MatMul() const {
    return main_type() == OpParameter_MatMul ? static_cast<const MatMul *>(main()) : nullptr;
  }
  const NonMaxSuppressionV2 *main_as_NonMaxSuppressionV2() const {
    return main_type() == OpParameter_NonMaxSuppressionV2 ? static_cast<const NonMaxSuppressionV2 *>(main()) : nullptr;
  }
  const Normalize *main_as_Normalize() const {
    return main_type() == OpParameter_Normalize ? static_cast<const Normalize *>(main()) : nullptr;
  }
  const PackParam *main_as_PackParam() const {
    return main_type() == OpParameter_PackParam ? static_cast<const PackParam *>(main()) : nullptr;
  }
  const Permute *main_as_Permute() const {
    return main_type() == OpParameter_Permute ? static_cast<const Permute *>(main()) : nullptr;
  }
  const Plugin *main_as_Plugin() const {
    return main_type() == OpParameter_Plugin ? static_cast<const Plugin *>(main()) : nullptr;
  }
  const Pool *main_as_Pool() const {
    return main_type() == OpParameter_Pool ? static_cast<const Pool *>(main()) : nullptr;
  }
  const PRelu *main_as_PRelu() const {
    return main_type() == OpParameter_PRelu ? static_cast<const PRelu *>(main()) : nullptr;
  }
  const PriorBox *main_as_PriorBox() const {
    return main_type() == OpParameter_PriorBox ? static_cast<const PriorBox *>(main()) : nullptr;
  }
  const Proposal *main_as_Proposal() const {
    return main_type() == OpParameter_Proposal ? static_cast<const Proposal *>(main()) : nullptr;
  }
  const QuantizedAvgPool *main_as_QuantizedAvgPool() const {
    return main_type() == OpParameter_QuantizedAvgPool ? static_cast<const QuantizedAvgPool *>(main()) : nullptr;
  }
  const QuantizedBiasAdd *main_as_QuantizedBiasAdd() const {
    return main_type() == OpParameter_QuantizedBiasAdd ? static_cast<const QuantizedBiasAdd *>(main()) : nullptr;
  }
  const QuantizedConcat *main_as_QuantizedConcat() const {
    return main_type() == OpParameter_QuantizedConcat ? static_cast<const QuantizedConcat *>(main()) : nullptr;
  }
  const QuantizedLogistic *main_as_QuantizedLogistic() const {
    return main_type() == OpParameter_QuantizedLogistic ? static_cast<const QuantizedLogistic *>(main()) : nullptr;
  }
  const QuantizedMatMul *main_as_QuantizedMatMul() const {
    return main_type() == OpParameter_QuantizedMatMul ? static_cast<const QuantizedMatMul *>(main()) : nullptr;
  }
  const QuantizedMaxPool *main_as_QuantizedMaxPool() const {
    return main_type() == OpParameter_QuantizedMaxPool ? static_cast<const QuantizedMaxPool *>(main()) : nullptr;
  }
  const QuantizedRelu *main_as_QuantizedRelu() const {
    return main_type() == OpParameter_QuantizedRelu ? static_cast<const QuantizedRelu *>(main()) : nullptr;
  }
  const QuantizedRelu6 *main_as_QuantizedRelu6() const {
    return main_type() == OpParameter_QuantizedRelu6 ? static_cast<const QuantizedRelu6 *>(main()) : nullptr;
  }
  const QuantizedReshape *main_as_QuantizedReshape() const {
    return main_type() == OpParameter_QuantizedReshape ? static_cast<const QuantizedReshape *>(main()) : nullptr;
  }
  const QuantizedSoftmax *main_as_QuantizedSoftmax() const {
    return main_type() == OpParameter_QuantizedSoftmax ? static_cast<const QuantizedSoftmax *>(main()) : nullptr;
  }
  const QuantizeMaxMin *main_as_QuantizeMaxMin() const {
    return main_type() == OpParameter_QuantizeMaxMin ? static_cast<const QuantizeMaxMin *>(main()) : nullptr;
  }
  const QuantizeV2 *main_as_QuantizeV2() const {
    return main_type() == OpParameter_QuantizeV2 ? static_cast<const QuantizeV2 *>(main()) : nullptr;
  }
  const Range *main_as_Range() const {
    return main_type() == OpParameter_Range ? static_cast<const Range *>(main()) : nullptr;
  }
  const Rank *main_as_Rank() const {
    return main_type() == OpParameter_Rank ? static_cast<const Rank *>(main()) : nullptr;
  }
  const ReduceJoin *main_as_ReduceJoin() const {
    return main_type() == OpParameter_ReduceJoin ? static_cast<const ReduceJoin *>(main()) : nullptr;
  }
  const ReductionParam *main_as_ReductionParam() const {
    return main_type() == OpParameter_ReductionParam ? static_cast<const ReductionParam *>(main()) : nullptr;
  }
  const Relu *main_as_Relu() const {
    return main_type() == OpParameter_Relu ? static_cast<const Relu *>(main()) : nullptr;
  }
  const Relu6 *main_as_Relu6() const {
    return main_type() == OpParameter_Relu6 ? static_cast<const Relu6 *>(main()) : nullptr;
  }
  const RequantizationRange *main_as_RequantizationRange() const {
    return main_type() == OpParameter_RequantizationRange ? static_cast<const RequantizationRange *>(main()) : nullptr;
  }
  const Requantize *main_as_Requantize() const {
    return main_type() == OpParameter_Requantize ? static_cast<const Requantize *>(main()) : nullptr;
  }
  const Reshape *main_as_Reshape() const {
    return main_type() == OpParameter_Reshape ? static_cast<const Reshape *>(main()) : nullptr;
  }
  const Resize *main_as_Resize() const {
    return main_type() == OpParameter_Resize ? static_cast<const Resize *>(main()) : nullptr;
  }
  const RoiParameters *main_as_RoiParameters() const {
    return main_type() == OpParameter_RoiParameters ? static_cast<const RoiParameters *>(main()) : nullptr;
  }
  const Scale *main_as_Scale() const {
    return main_type() == OpParameter_Scale ? static_cast<const Scale *>(main()) : nullptr;
  }
  const Selu *main_as_Selu() const {
    return main_type() == OpParameter_Selu ? static_cast<const Selu *>(main()) : nullptr;
  }
  const Size *main_as_Size() const {
    return main_type() == OpParameter_Size ? static_cast<const Size *>(main()) : nullptr;
  }
  const Slice *main_as_Slice() const {
    return main_type() == OpParameter_Slice ? static_cast<const Slice *>(main()) : nullptr;
  }
  const SliceTf *main_as_SliceTf() const {
    return main_type() == OpParameter_SliceTf ? static_cast<const SliceTf *>(main()) : nullptr;
  }
  const SpaceBatch *main_as_SpaceBatch() const {
    return main_type() == OpParameter_SpaceBatch ? static_cast<const SpaceBatch *>(main()) : nullptr;
  }
  const SqueezeParam *main_as_SqueezeParam() const {
    return main_type() == OpParameter_SqueezeParam ? static_cast<const SqueezeParam *>(main()) : nullptr;
  }
  const StridedSliceParam *main_as_StridedSliceParam() const {
    return main_type() == OpParameter_StridedSliceParam ? static_cast<const StridedSliceParam *>(main()) : nullptr;
  }
  const TensorConvertInfo *main_as_TensorConvertInfo() const {
    return main_type() == OpParameter_TensorConvertInfo ? static_cast<const TensorConvertInfo *>(main()) : nullptr;
  }
  const TfQuantizedConv2D *main_as_TfQuantizedConv2D() const {
    return main_type() == OpParameter_TfQuantizedConv2D ? static_cast<const TfQuantizedConv2D *>(main()) : nullptr;
  }
  const TopKV2 *main_as_TopKV2() const {
    return main_type() == OpParameter_TopKV2 ? static_cast<const TopKV2 *>(main()) : nullptr;
  }
  const Transpose *main_as_Transpose() const {
    return main_type() == OpParameter_Transpose ? static_cast<const Transpose *>(main()) : nullptr;
  }
  const UnaryOp *main_as_UnaryOp() const {
    return main_type() == OpParameter_UnaryOp ? static_cast<const UnaryOp *>(main()) : nullptr;
  }
  const MomentsParam *main_as_MomentsParam() const {
    return main_type() == OpParameter_MomentsParam ? static_cast<const MomentsParam *>(main()) : nullptr;
  }
  const RNNParam *main_as_RNNParam() const {
    return main_type() == OpParameter_RNNParam ? static_cast<const RNNParam *>(main()) : nullptr;
  }
  const BatchMatMulParam *main_as_BatchMatMulParam() const {
    return main_type() == OpParameter_BatchMatMulParam ? static_cast<const BatchMatMulParam *>(main()) : nullptr;
  }
  const QuantizedFloatParam *main_as_QuantizedFloatParam() const {
    return main_type() == OpParameter_QuantizedFloatParam ? static_cast<const QuantizedFloatParam *>(main()) : nullptr;
  }
  const DepthSpaceParam *main_as_DepthSpaceParam() const {
    return main_type() == OpParameter_DepthSpaceParam ? static_cast<const DepthSpaceParam *>(main()) : nullptr;
  }
  const EltwiseInt8 *main_as_EltwiseInt8() const {
    return main_type() == OpParameter_EltwiseInt8 ? static_cast<const EltwiseInt8 *>(main()) : nullptr;
  }
  const ReverseSequenceParam *main_as_ReverseSequenceParam() const {
    return main_type() == OpParameter_ReverseSequenceParam ? static_cast<const ReverseSequenceParam *>(main()) : nullptr;
  }
  const Extra *main_as_Extra() const {
    return main_type() == OpParameter_Extra ? static_cast<const Extra *>(main()) : nullptr;
  }
  const Pool3D *main_as_Pool3D() const {
    return main_type() == OpParameter_Pool3D ? static_cast<const Pool3D *>(main()) : nullptr;
  }
  const Convolution3D *main_as_Convolution3D() const {
    return main_type() == OpParameter_Convolution3D ? static_cast<const Convolution3D *>(main()) : nullptr;
  }
  const ELU *main_as_ELU() const {
    return main_type() == OpParameter_ELU ? static_cast<const ELU *>(main()) : nullptr;
  }
  const DetectionPostProcessParam *main_as_DetectionPostProcessParam() const {
    return main_type() == OpParameter_DetectionPostProcessParam ? static_cast<const DetectionPostProcessParam *>(main()) : nullptr;
  }
  const OneHotParam *main_as_OneHotParam() const {
    return main_type() == OpParameter_OneHotParam ? static_cast<const OneHotParam *>(main()) : nullptr;
  }
  const PadParam *main_as_PadParam() const {
    return main_type() == OpParameter_PadParam ? static_cast<const PadParam *>(main()) : nullptr;
  }
  const WhileParam *main_as_WhileParam() const {
    return main_type() == OpParameter_WhileParam ? static_cast<const WhileParam *>(main()) : nullptr;
  }
  const IfParam *main_as_IfParam() const {
    return main_type() == OpParameter_IfParam ? static_cast<const IfParam *>(main()) : nullptr;
  }
  const RandomUniform *main_as_RandomUniform() const {
    return main_type() == OpParameter_RandomUniform ? static_cast<const RandomUniform *>(main()) : nullptr;
  }
  const LayerNorm *main_as_LayerNorm() const {
    return main_type() == OpParameter_LayerNorm ? static_cast<const LayerNorm *>(main()) : nullptr;
  }
  const TensorArray *main_as_TensorArray() const {
    return main_type() == OpParameter_TensorArray ? static_cast<const TensorArray *>(main()) : nullptr;
  }
  const LSTMBlockCell *main_as_LSTMBlockCell() const {
    return main_type() == OpParameter_LSTMBlockCell ? static_cast<const LSTMBlockCell *>(main()) : nullptr;
  }
  const GridSample *main_as_GridSample() const {
    return main_type() == OpParameter_GridSample ? static_cast<const GridSample *>(main()) : nullptr;
  }
  const LoopParam *main_as_LoopParam() const {
    return main_type() == OpParameter_LoopParam ? static_cast<const LoopParam *>(main()) : nullptr;
  }
  const ImageProcessParam *main_as_ImageProcessParam() const {
    return main_type() == OpParameter_ImageProcessParam ? static_cast<const ImageProcessParam *>(main()) : nullptr;
  }
  const CumSum *main_as_CumSum() const {
    return main_type() == OpParameter_CumSum ? static_cast<const CumSum *>(main()) : nullptr;
  }
  const flatbuffers::String *name() const {
    return GetPointer<const flatbuffers::String *>(10);
  }
  const flatbuffers::Vector<int32_t> *outputIndexes() const {
    return GetPointer<const flatbuffers::Vector<int32_t> *>(12);
  }
  OpType type() const {
    return static_cast<OpType>(GetField<int32_t>(14, 0));
  }
  MNN_DATA_FORMAT defaultDimentionFormat() const {
    return static_cast<MNN_DATA_FORMAT>(GetField<int8_t>(16, 1));
  }
  bool Verify(flatbuffers::Verifier &verifier) const {
    return VerifyTableStart(verifier) &&
           VerifyOffset(verifier, 4) &&
           verifier.VerifyVector(inputIndexes()) &&
           VerifyField<uint8_t>(verifier, 6) &&
           VerifyOffset(verifier, 8) &&
           VerifyOpParameter(verifier, main(), main_type()) &&
           VerifyOffset(verifier, 10) &&
           verifier.VerifyString(name()) &&
           VerifyOffset(verifier, 12) &&
           verifier.VerifyVector(outputIndexes()) &&
           VerifyField<int32_t>(verifier, 14) &&
           VerifyField<int8_t>(verifier, 16) &&
           verifier.EndTable();
  }
  OpT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
  void UnPackTo(OpT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
  static flatbuffers::Offset<Op> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OpT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
};

1.1.2.4 setInputOutputForOps

// source/utils/InitNet.cpp
void setInputOutputForOps(std::vector<std::shared_ptr<Tensor>>& allTensors, const std::vector<const Op*>& ops, bool isStatic) {
    std::set<int> inputIndexes;
    std::set<int> outputIndexes;
    // 0. deal virtual tensor for static model:
    // when : A (Any_Op) -----> B (Raster_Op)
    // the tensor will be like below:
    //      A_outputs : a_tensor
    //      B_inputs  : b_tensor (virtual)
    //      b_tensor.describe.origin = a_tensor_ptr
    // b_tensor is not a InputTensot, a_tensor is not a OutputTensor
    // so add b_tensor to OutputIndexes, a_tensor to InputIndexes.
    if (isStatic) {
        std::unordered_map<Tensor*, int> tensorMap;
        for (int index = 0; index < allTensors.size(); index++) {
            tensorMap.insert(std::make_pair(allTensors[index].get(), index));
        }
        for (int index = 0; index < allTensors.size(); index++) {
            auto des = TensorUtils::getDescribe(allTensors[index].get());
            for (int i = 0; i < des->regions.size(); i++) {
                outputIndexes.insert(index);
                MNN_ASSERT(tensorMap.find(des->regions[i].origin) != tensorMap.end());
                int x = tensorMap[des->regions[i].origin];
                inputIndexes.insert(x);
            }
        }
    }
    // 1. insert all output/input index in outputIndexes/inputIndexes
    for (auto op : ops) {
        if (nullptr != op->outputIndexes()) {
            auto data = op->outputIndexes()->data();
            for (int j = 0; j < op->outputIndexes()->size(); ++j) {
                outputIndexes.insert(data[j]);
            }
        }
        if (nullptr != op->inputIndexes()) {
            auto data = op->inputIndexes()->data();
            for (int j = 0; j < op->inputIndexes()->size(); ++j) {
                inputIndexes.insert(data[j]);
            }
        }
        MNN_ASSERT(OpType_Input != op->type());
    }
    // 2. the index in outputIndexes/inputIndexed but not in inputIndexes/outputIndexes is output/input
    std::set<int> input;
    std::set<int> output;
    std::set_difference(outputIndexes.begin(), outputIndexes.end(), inputIndexes.begin(), inputIndexes.end(),
                        std::inserter(output, output.begin()));
    std::set_difference(inputIndexes.begin(), inputIndexes.end(), outputIndexes.begin(), outputIndexes.end(),
                        std::inserter(input, input.begin()));
    // 3. set usage for Tensor by index
    for (auto index : input) {
        auto des = TensorUtils::getDescribe(allTensors[index].get());
        if (des->usage == Tensor::InsideDescribe::CONSTANT || des->usage == Tensor::InsideDescribe::TRAINABLE) {
            continue;
        }
        des->usage = Tensor::InsideDescribe::INPUT;
    }
    for (auto index : output) {
        auto des = TensorUtils::getDescribe(allTensors[index].get());
        if (des->usage == Tensor::InsideDescribe::NORMAL) {
            des->usage = TensorUsage::OUTPUT;
        }
    }
}

1.1.2.5 GeometryComputerUtils::buildConstantTensors

// source/geometry/GeometryComputerUtils.cpp
int GeometryComputerUtils::buildConstantTensors(std::vector<Schedule::OpCacheInfo>& infos) {
    // Check Middle Const
    // infos.size() = 171
    for (auto& info : infos) {
        if (info.op->type() == OpType_Const) {
            continue;
        }
        bool isConst = true;
        for (int i = 0; i < info.inputs.size(); ++i) {
            if (TensorUtils::getDescribe(info.inputs[i])->usage == Tensor::InsideDescribe::CONSTANT) {
                continue;
            }
            // 需要 content 则不为 Const
            if (OpCommonUtils::opNeedContent(info.op, i)) {
                isConst = false;
                break;
            }
        }
        if (isConst) {
            for (auto t : info.outputs) {
                TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
            }
            info.type = Schedule::CONSTANT;
        }
    }
    // Check force size compute op
    int breakIndex = -1;
    for (int infoIndex=0; infoIndex < infos.size(); ++infoIndex) {
        auto& info = infos[infoIndex];
        if (info.op->type() == OpType_Const) {
            continue;
        }
        if (info.op->type() == OpType_Where && info.op->main_type() != OpParameter_Extra) {
            // For compability old model
            continue;
        }
        auto dims = SizeComputer::needInputContent(info.op, info.inputs.size());
        for (auto index : dims) {
            if (index < info.inputs.size()) {
                TensorUtils::getDescribe(info.inputs[index])->stageMask |= MNN::Tensor::InsideDescribe::StageInfo::GEOMETRY_STAGE;
                if (TensorUtils::getDescribe(info.inputs[index])->usage != Tensor::InsideDescribe::CONSTANT) {
                    breakIndex = infoIndex;
                    TensorUtils::getDescribe(info.inputs[index])->usage = Tensor::InsideDescribe::CONSTANT;
                }
            }
        }
    }
    if (breakIndex >= 0) {
        bool hasConst = true;
        while (hasConst) {
            hasConst = false;
            for (auto& info : infos) {
                if (info.type == Schedule::CONSTANT) {
                    continue;
                }
                bool turnConst = false;
                for (auto t : info.outputs) {
                    if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::CONSTANT) {
                        turnConst = true;
                        break;
                    }
                }
                if (turnConst) {
                    for (auto t : info.outputs) {
                        TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
                    }
                    for (auto t : info.inputs) {
                        TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
                    }
                    info.type = Schedule::CONSTANT;
                    hasConst  = true;
                }
              }
        }
    }
    for (auto& info : infos) {
        if (info.type == Schedule::CONSTANT) {
            for (auto t : info.inputs) {
                TensorUtils::getDescribe(t)->stageMask |= MNN::Tensor::InsideDescribe::StageInfo::GEOMETRY_STAGE;
            }
            for (auto t : info.outputs) {
                TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
            }
        }
    }
    return breakIndex;
}

1.1.2.5.1 OpCommonUtils::opNeedContent

bool OpCommonUtils::opNeedContent(const MNN::Op* op, int index) {
    int type = op->type();
    switch (type) {
        case OpType_ZerosLike:
        case OpType_ZeroGrad:
        case OpType_Shape:
        case OpType_Rank:
        case OpType_Const:
        case OpType_Size:
        case OpType_PriorBox:
            return false;
        case OpType_Interp:
        case OpType_Crop:
        case OpType_Reshape:
        case OpType_Reduction:
        case OpType_Resize:
            if (1 == index) {
                return false;
            }
            break;
        case OpType_GridSample:
            if (2 == index) {
                return false;
            }
            break;
#ifdef MNN_SUPPORT_RENDER
        case OpType_RasterAndInterpolate:
        {
            if (0 == index) {
                int type = 4;
                if (op->main_type() == OpParameter_Extra) {
                    auto extra = op->main_as_Extra();
                    if (nullptr != extra->attr()) {
                        for (int i=0; i<extra->attr()->size(); ++i) {
                            auto attr = extra->attr()->GetAs<Attribute>(i);
                            if (attr->key()->str() == "primitiveType") {
                                type = attr->i();
                                break;
                            }
                        }
                    }
                }
                if (type <= 4) {
                    return false;
                }
            }
            break;
        }
#endif
        default:
            break;
    }
    return true;
}

1.1.3 Tensor 张量

// project/android/demo/app/includes/MNN/Tensor.hpp
class MNN_PUBLIC Tensor {
public:
    struct InsideDescribe;

    /** dimension type used to create tensor */
    enum DimensionType {
        /** for tensorflow net type. uses NHWC as data format. */
        TENSORFLOW,
        /** for caffe net type. uses NCHW as data format. */
        CAFFE,
        /** for caffe net type. uses NC4HW4 as data format. */
        CAFFE_C4
    };

    /** handle type */
    enum HandleDataType {
        /** default handle type */
        HANDLE_NONE = 0,
        /** string handle type */
        HANDLE_STRING = 1
    };

    /** dimension reorder flag */
    enum DataReorderType {
        /** default reorder type, do not reorder */
        NO_REORDER = 0,
        /** reorder dimension 4 by 4. usually used with NC4HW4 or NHWC4 while data type is float. */
        REORDER_4 = 1,
        /** reorder dimension 8 by 8. usually used with NC4HW4 or NHWC4 while data type is uint8 or int8. */
        REORDER_8
    };

public:
    /**
     * @brief create a tensor with dimension size and type without acquire memory for data.
     * @param dimSize   dimension size.
     * @param type      dimension type.
     */
    Tensor(int dimSize = 4, DimensionType type = CAFFE);

    /**
     * @brief create a tensor with same shape as given tensor.
     * @param tensor        shape provider.
     * @param type          dimension type.
     * @param allocMemory   acquire memory for data or not.
     * @warning tensor data won't be copied.
     */
    Tensor(const Tensor* tensor, DimensionType type = CAFFE, bool allocMemory = true);

    /** deinitializer */
    ~Tensor();

private:
    // remove all assignment operator
    Tensor(const Tensor& tensor)  = delete;
    Tensor(const Tensor&& tensor) = delete;
    Tensor& operator=(const Tensor&) = delete;
    Tensor& operator=(const Tensor&&) = delete;

public:
    /**
     * @brief create tensor with shape, data type and dimension type.
     * @param shape     tensor shape.
     * @param type      data type.
     * @param dimType   dimension type.
     * @return created tensor.
     * @warning memory for data won't be acquired. call backend's onAcquireBuffer to get memory ready.
     */
    static Tensor* createDevice(const std::vector<int>& shape, halide_type_t type, DimensionType dimType = TENSORFLOW);

    /**
     * @brief create tensor with shape and dimension type. data type is represented by `T`.
     * @param shape     tensor shape.
     * @param dimType   dimension type.
     * @return created tensor.
     * @warning memory for data won't be acquired. call backend's onAcquireBuffer to get memory ready.
     */
    template <typename T>
    static Tensor* createDevice(const std::vector<int>& shape, DimensionType dimType = TENSORFLOW) {
        return createDevice(shape, halide_type_of<T>(), dimType);
    }

    /**
     * @brief create tensor with shape, data type, data and dimension type.
     * @param shape     tensor shape.
     * @param type      data type.
     * @param data      data to save.
     * @param dimType   dimension type.
     * @return created tensor.
     */
    static Tensor* create(const std::vector<int>& shape, halide_type_t type, void* data = NULL,
                          DimensionType dimType = TENSORFLOW);

    /**
     * @brief create tensor with shape, data and dimension type. data type is represented by `T`.
     * @param shape     tensor shape.
     * @param data      data to save.
     * @param dimType   dimension type.
     * @return created tensor.
     */
    template <typename T>
    static Tensor* create(const std::vector<int>& shape, void* data = NULL, DimensionType dimType = TENSORFLOW) {
        return create(shape, halide_type_of<T>(), data, dimType);
    }

public:
    /**
     * @brief for DEVICE tensor, copy data from given host tensor.
     * @param hostTensor    host tensor, the data provider.
     * @return true for DEVICE tensor, and false for HOST tensor.
     */
    bool copyFromHostTensor(const Tensor* hostTensor);

    /**
     * @brief for DEVICE tensor, copy data to given host tensor.
     * @param hostTensor    host tensor, the data consumer.
     * @return true for DEVICE tensor, and false for HOST tensor.
     */
    bool copyToHostTensor(Tensor* hostTensor) const;

    /**
     * @brief create HOST tensor from DEVICE tensor, with or without data copying.
     * @param deviceTensor  given device tensor.
     * @param copyData      copy data or not.
     * @return created host tensor.
     */
    static Tensor* createHostTensorFromDevice(const Tensor* deviceTensor, bool copyData = true);

public:
    const halide_buffer_t& buffer() const {
        return mBuffer;
    }
    halide_buffer_t& buffer() {
        return mBuffer;
    }

    /**
     * @brief get dimension type.
     * @return dimension type.
     */
    DimensionType getDimensionType() const;

    /**
     * @brief handle data type. used when data type code is halide_type_handle.
     * @return handle data type.
     */
    HandleDataType getHandleDataType() const;

    /**
     * @brief set data type.
     * @param type data type defined in 'Type_generated.h'.
     */
    void setType(int type);

    /**
     * @brief get data type.
     * @return data type.
     */
    inline halide_type_t getType() const {
        return mBuffer.type;
    }

    /**
     * @brief visit host memory, data type is represented by `T`.
     * @return data point in `T` type.
     */
    template <typename T>
    T* host() const {
        return (T*)mBuffer.host;
    }

    /**
     * @brief visit device memory.
     * @return device data ID. what the ID means varies between backends.
     */
    uint64_t deviceId() const {
        return mBuffer.device;
    }

public:
    int dimensions() const {
        return mBuffer.dimensions;
    }

    /**
     * @brief get all dimensions' extent.
     * @return dimensions' extent.
     */
    std::vector<int> shape() const;

    /**
     * @brief calculate number of bytes needed to store data taking reordering flag into account.
     * @return bytes needed to store data
     */
    int size() const;

    /**
     * @brief calculate number of elements needed to store data taking reordering flag into account.
     * @return elements needed to store data
     */
    inline int elementSize() const {
        return size() / mBuffer.type.bytes();
    }

public:
    inline int width() const {
        if (getDimensionType() == TENSORFLOW) {
            return mBuffer.dim[2].extent;
        }
        
        return mBuffer.dim[3].extent;
    }
    inline int height() const {
        if (getDimensionType() == TENSORFLOW) {
            return mBuffer.dim[1].extent;
        }
        return mBuffer.dim[2].extent;
    }
    inline int channel() const {
        if (getDimensionType() == TENSORFLOW) {
            return mBuffer.dim[3].extent;
        }
        return mBuffer.dim[1].extent;
    }
    inline int batch() const {
        return mBuffer.dim[0].extent;
    }

    // visit dimension's extent & stride
    inline int stride(int index) const {
        return mBuffer.dim[index].stride;
    }
    inline int length(int index) const {
        return mBuffer.dim[index].extent;
    }
    inline void setStride(int index, int stride) {
        mBuffer.dim[index].stride = stride;
    }
    inline void setLength(int index, int length) {
        mBuffer.dim[index].extent = length;
    }

public:
    /**
     * @brief print tensor data. for DEBUG use only.
     */
    void print() const;

private:
    halide_buffer_t mBuffer;
    struct InsideDescribe* mDescribe;

private:
    friend class TensorUtils;
};

1.1.3.1 Tensor::InsideDescribe

// source/core/TensorUtils.hpp
struct Tensor::InsideDescribe {
    struct View {
        int32_t offset = 0;
        int32_t stride[3] = {1, 1, 1};
    };
    struct Region {
        View src;
        View dst;
        int32_t size[3] = {1, 1, 1};
        Tensor* origin;
    };
    struct pad {
        int32_t left = 0;
        int32_t right = 0;
        int32_t bottom = 0;
        int32_t top = 0;
    };
    enum MemoryType {
        /** The tensor's memory come from Backend */
        MEMORY_BACKEND = 0,

        /** host memory is owned by tensor or not */
        MEMORY_HOST,

        /** The tensor don't has memory */
        MEMORY_VIRTUAL,

        /** host memory is owned by tensor or not */
        MEMORY_OUTSIDE,
    };
    enum Usage {
        NORMAL,
        INPUT,
        OUTPUT,
        CONSTANT,
        /** Whether the tensor is a trainable parameter. Trainable parameter should be stored in a different area. */
        TRAINABLE,
    };
    // For Mask
    enum StageInfo {
        GEOMETRY_STAGE = 1,
        CONVERTED_STAGE = 1 << 4
    };
    /** extra tensor info container */
    struct NativeInsideDescribe : public RefCount {
    public:
        /** dimension format */
        MNN_DATA_FORMAT dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
        union {
            /** Serperate memory offset*/
            int offset;

            /** function used to free handle */
            void (*handleFreeFunction)(void*);
        } extra;
        MemoryType memoryType = MEMORY_BACKEND;
        /** for DEVICE tensor only. */
        int useCount = 0;
        Usage usage = NORMAL;
        std::vector<Region> regions;
        halide_dimension_t dims[MNN_MAX_TENSOR_DIM];
        // TensorArray Attribute
        std::shared_ptr<TensorArrayAttr> tensorArrayAttr;
        // Tensor Quant Attribute
        std::shared_ptr<QuantAttr> quantAttr;
        // Only valid when quantAttr is not nullptr
        DataType type = DataType_DT_FLOAT;
        AutoRelease<Backend::MemObj> mem;
        bool isMutable = true;
        int index = -1;
		int channel_pack_num = 4;
        bool support_pack16 = true;
        pad mPads;
        // For isMutable = false Tensor , determine whether the content can be convert to main backend
        uint32_t stageMask = 0;
        inline Backend* getBackend() const {
            return backend;
        }
        inline void setBackend(Backend* bn) {
            backend = bn;
        }
    private:
        /** for DEVICE tensor only. backend used to manage tensor's device memory. */
        Backend* backend = nullptr;
    };
    SharedPtr<NativeInsideDescribe> mContent;
};

1.1.3.1.1 NativeInsideDescribe

// source/core/TensorUtils.hpp
/** extra tensor info container */
    struct NativeInsideDescribe : public RefCount {
    public:
        /** dimension format */
        MNN_DATA_FORMAT dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
        union {
            /** Serperate memory offset*/
            int offset;

            /** function used to free handle */
            void (*handleFreeFunction)(void*);
        } extra;
        MemoryType memoryType = MEMORY_BACKEND;
        /** for DEVICE tensor only. */
        int useCount = 0;
        Usage usage = NORMAL;
        std::vector<Region> regions;
        halide_dimension_t dims[MNN_MAX_TENSOR_DIM];
        // TensorArray Attribute
        std::shared_ptr<TensorArrayAttr> tensorArrayAttr;
        // Tensor Quant Attribute
        std::shared_ptr<QuantAttr> quantAttr;
        // Only valid when quantAttr is not nullptr
        DataType type = DataType_DT_FLOAT;
        AutoRelease<Backend::MemObj> mem;
        bool isMutable = true;
        int index = -1;
		int channel_pack_num = 4;
        bool support_pack16 = true;
        pad mPads;
        // For isMutable = false Tensor , determine whether the content can be convert to main backend
        uint32_t stageMask = 0;
        inline Backend* getBackend() const {
            return backend;
        }
        inline void setBackend(Backend* bn) {
            backend = bn;
        }
    private:
        /** for DEVICE tensor only. backend used to manage tensor's device memory. */
        Backend* backend = nullptr;
    };

1.1.3.1.1.1 RefCount

// source/core/AutoStorage.h
class RefCount
{
    public:
        void addRef() const
        {
            mNum++;
        }
        void decRef() const
        {
            --mNum;
            MNN_ASSERT(mNum>=0);
            if (0 >= mNum)
            {
                delete this;
            }
        }
    inline int count() const{return mNum;}
    protected:
        RefCount():mNum(1){}
        RefCount(const RefCount& f):mNum(f.mNum){}
        void operator=(const RefCount& f)
        {
            if (this != &f)
            {
                mNum = f.mNum;
            }
        }
        virtual ~RefCount(){}
    private:
        mutable int mNum;
};