Day 61-62 决策树(ID3)

代码:

package dl;

import java.io.FileReader;
import java.util.Arrays;
import weka.core.*;

/**
 * The ID3 decision tree inductive algorithm.
 */
public class ID3 {
    /**
     * The data.
     */
    Instances dataset;

    /**
     * Is this dataset pure (only one label)?
     */
    boolean pure;

    /**
     * The number of classes. For binary classification it is 2.
     */
    int numClasses;

    /**
     * Available instances. Other instances do not belong this branch.
     */
    int[] availableInstances;

    /**
     * Available attributes. Other attributes have been selected in the path
     * from the root.
     */
    int[] availableAttributes;

    /**
     * The selected attribute.
     */
    int splitAttribute;

    /**
     * The children nodes.
     */
    ID3[] children;

    /**
     * My label. Inner nodes also have a label. For example, <outlook = sunny,
     * humidity = high> never appear in the training data, but <humidity = high>
     * is valid in other cases.
     */
    int label;

    /**
     * The prediction, including queried and predicted labels.
     */
    int[] predicts;

    /**
     * Small block cannot be split further.
     */
    static int smallBlockThreshold = 3;

    /**
     ********************
     * The constructor.
     *
     * @param paraFilename
     *            The given file.
     ********************
     */
    public ID3(String paraFilename) {
        dataset = null;
        try {
            FileReader fileReader = new FileReader(paraFilename);
            dataset = new Instances(fileReader);
            fileReader.close();
        } catch (Exception ee) {
            System.out.println("Cannot read the file: " + paraFilename + "\r\n" + ee);
            System.exit(0);
        } // Of try

        dataset.setClassIndex(dataset.numAttributes() - 1);
        numClasses = dataset.classAttribute().numValues();

        availableInstances = new int[dataset.numInstances()];
        for (int i = 0; i < availableInstances.length; i++) {
            availableInstances[i] = i;
        } // Of for i
        availableAttributes = new int[dataset.numAttributes() - 1];
        for (int i = 0; i < availableAttributes.length; i++) {
            availableAttributes[i] = i;
        } // Of for i

        // Initialize.
        children = null;
        // Determine the label by simple voting.
        label = getMajorityClass(availableInstances);
        // Determine whether or not it is pure.
        pure = pureJudge(availableInstances);
    }// Of the first constructor

    /**
     ********************
     * The constructor.
     *
     * @param paraDataset
     *            The given dataset.
     ********************
     */
    public ID3(Instances paraDataset, int[] paraAvailableInstances, int[] paraAvailableAttributes) {
        // Copy its reference instead of clone the availableInstances.
        dataset = paraDataset;
        availableInstances = paraAvailableInstances;
        availableAttributes = paraAvailableAttributes;

        // Initialize.
        children = null;
        // Determine the label by simple voting.
        label = getMajorityClass(availableInstances);
        // Determine whether or not it is pure.
        pure = pureJudge(availableInstances);
    }// Of the second constructor

    /**
     **********************************
     * Is the given block pure?
     *
     * @param paraBlock
     *            The block.
     * @return True if pure.
     **********************************
     */
    public boolean pureJudge(int[] paraBlock) {
        pure = true;

        for (int i = 1; i < paraBlock.length; i++) {
            if (dataset.instance(paraBlock[i]).classValue() != dataset.instance(paraBlock[0])
                    .classValue()) {
                pure = false;
                break;
            } // Of if
        } // Of for i

        return pure;
    }// Of pureJudge

    /**
     **********************************
     * Compute the majority class of the given block for voting.
     *
     * @param paraBlock
     *            The block.
     * @return The majority class.
     **********************************
     */
    public int getMajorityClass(int[] paraBlock) {
        int[] tempClassCounts = new int[dataset.numClasses()];
        for (int i = 0; i < paraBlock.length; i++) {
            tempClassCounts[(int) dataset.instance(paraBlock[i]).classValue()]++;
        } // Of for i

        int resultMajorityClass = -1;
        int tempMaxCount = -1;
        for (int i = 0; i < tempClassCounts.length; i++) {
            if (tempMaxCount < tempClassCounts[i]) {
                resultMajorityClass = i;
                tempMaxCount = tempClassCounts[i];
            } // Of if
        } // Of for i

        return resultMajorityClass;
    }// Of getMajorityClass

    /**
     **********************************
     * Select the best attribute.
     *
     * @return The best attribute index.
     **********************************
     */
    public int selectBestAttribute() {
        splitAttribute = -1;
        double tempMinimalEntropy = 10000;
        double tempEntropy;
        for (int i = 0; i < availableAttributes.length; i++) {
            tempEntropy = conditionalEntropy(availableAttributes[i]);
            if (tempMinimalEntropy > tempEntropy) {
                tempMinimalEntropy = tempEntropy;
                splitAttribute = availableAttributes[i];
            } // Of if
        } // Of for i
        return splitAttribute;
    }// Of selectBestAttribute

    /**
     **********************************
     * Compute the conditional entropy of an attribute.
     *
     * @param paraAttribute
     *            The given attribute.
     *
     * @return The entropy.
     **********************************
     */
    public double conditionalEntropy(int paraAttribute) {
        // Step 1. Statistics.
        int tempNumClasses = dataset.numClasses();
        int tempNumValues = dataset.attribute(paraAttribute).numValues();
        int tempNumInstances = availableInstances.length;
        double[] tempValueCounts = new double[tempNumValues];
        double[][] tempCountMatrix = new double[tempNumValues][tempNumClasses];

        int tempClass, tempValue;
        for (int i = 0; i < tempNumInstances; i++) {
            tempClass = (int) dataset.instance(availableInstances[i]).classValue();
            tempValue = (int) dataset.instance(availableInstances[i]).value(paraAttribute);
            tempValueCounts[tempValue]++;
            tempCountMatrix[tempValue][tempClass]++;
        } // Of for i

        // Step 2.
        double resultEntropy = 0;
        double tempEntropy, tempFraction;
        for (int i = 0; i < tempNumValues; i++) {
            if (tempValueCounts[i] == 0) {
                continue;
            } // Of if
            tempEntropy = 0;
            for (int j = 0; j < tempNumClasses; j++) {
                tempFraction = tempCountMatrix[i][j] / tempValueCounts[i];
                if (tempFraction == 0) {
                    continue;
                } // Of if
                tempEntropy += -tempFraction * Math.log(tempFraction);
            } // Of for j
            resultEntropy += tempValueCounts[i] / tempNumInstances * tempEntropy;
        } // Of for i

        return resultEntropy;
    }// Of conditionalEntropy

    /**
     **********************************
     * Split the data according to the given attribute.
     *
     * @return The blocks.
     **********************************
     */
    public int[][] splitData(int paraAttribute) {
        int tempNumValues = dataset.attribute(paraAttribute).numValues();
        // System.out.println("Dataset " + dataset + "\r\n");
        // System.out.println("Attribute " + paraAttribute + " has " +
        // tempNumValues + " values.\r\n");
        int[][] resultBlocks = new int[tempNumValues][];
        int[] tempSizes = new int[tempNumValues];

        // First scan to count the size of each block.
        int tempValue;
        for (int i = 0; i < availableInstances.length; i++) {
            tempValue = (int) dataset.instance(availableInstances[i]).value(paraAttribute);
            tempSizes[tempValue]++;
        } // Of for i

        // Allocate space.
        for (int i = 0; i < tempNumValues; i++) {
            resultBlocks[i] = new int[tempSizes[i]];
        } // Of for i

        // Second scan to fill.
        Arrays.fill(tempSizes, 0);
        for (int i = 0; i < availableInstances.length; i++) {
            tempValue = (int) dataset.instance(availableInstances[i]).value(paraAttribute);
            // Copy data.
            resultBlocks[tempValue][tempSizes[tempValue]] = availableInstances[i];
            tempSizes[tempValue]++;
        } // Of for i

        return resultBlocks;
    }// Of splitData

    /**
     **********************************
     * Build the tree recursively.
     **********************************
     */
    public void buildTree() {
        if (pureJudge(availableInstances)) {
            return;
        } // Of if
        if (availableInstances.length <= smallBlockThreshold) {
            return;
        } // Of if

        selectBestAttribute();
        int[][] tempSubBlocks = splitData(splitAttribute);
        children = new ID3[tempSubBlocks.length];

        // Construct the remaining attribute set.
        int[] tempRemainingAttributes = new int[availableAttributes.length - 1];
        for (int i = 0; i < availableAttributes.length; i++) {
            if (availableAttributes[i] < splitAttribute) {
                tempRemainingAttributes[i] = availableAttributes[i];
            } else if (availableAttributes[i] > splitAttribute) {
                tempRemainingAttributes[i - 1] = availableAttributes[i];
            } // Of if
        } // Of for i

        // Construct children.
        for (int i = 0; i < children.length; i++) {
            if ((tempSubBlocks[i] == null) || (tempSubBlocks[i].length == 0)) {
                children[i] = null;
                continue;
            } else {
                // System.out.println("Building children #" + i + " with
                // instances " + Arrays.toString(tempSubBlocks[i]));
                children[i] = new ID3(dataset, tempSubBlocks[i], tempRemainingAttributes);

                // Important code: do this recursively
                children[i].buildTree();
            } // Of if
        } // Of for i
    }// Of buildTree

    /**
     **********************************
     * Classify an instance.
     *
     * @param paraInstance
     *            The given instance.
     * @return The prediction.
     **********************************
     */
    public int classify(Instance paraInstance) {
        if (children == null) {
            return label;
        } // Of if

        ID3 tempChild = children[(int) paraInstance.value(splitAttribute)];
        if (tempChild == null) {
            return label;
        } // Of if

        return tempChild.classify(paraInstance);
    }// Of classify

    /**
     **********************************
     * Test on a testing set.
     *
     * @param paraDataset
     *            The given testing data.
     * @return The accuracy.
     **********************************
     */
    public double test(Instances paraDataset) {
        double tempCorrect = 0;
        for (int i = 0; i < paraDataset.numInstances(); i++) {
            if (classify(paraDataset.instance(i)) == (int) paraDataset.instance(i).classValue()) {
                tempCorrect++;
            } // Of i
        } // Of for i

        return tempCorrect / paraDataset.numInstances();
    }// Of test

    /**
     **********************************
     * Test on the training set.
     *
     * @return The accuracy.
     **********************************
     */
    public double selfTest() {
        return test(dataset);
    }// Of selfTest

    /**
     *******************
     * Overrides the method claimed in Object.
     *
     * @return The tree structure.
     *******************
     */
    public String toString() {
        String resultString = "";
        String tempAttributeName = dataset.attribute(splitAttribute).name();
        if (children == null) {
            resultString += "class = " + label;
        } else {
            for (int i = 0; i < children.length; i++) {
                if (children[i] == null) {
                    resultString += tempAttributeName + " = "
                            + dataset.attribute(splitAttribute).value(i) + ":" + "class = " + label
                            + "\r\n";
                } else {
                    resultString += tempAttributeName + " = "
                            + dataset.attribute(splitAttribute).value(i) + ":" + children[i]
                            + "\r\n";
                } // Of if
            } // Of for i
        } // Of if

        return resultString;
    }// Of toString

    /**
     *************************
     * Test this class.
     *
     * @param args
     *            Not used now.
     *************************
     */
    public static void id3Test() {
        ID3 tempID3 = new ID3("C:\\Users\\86183\\IdeaProjects\\deepLearning\\src\\main\\java\\resources\\weather.arff");
        ID3.smallBlockThreshold = 3;
        tempID3.buildTree();

        System.out.println("The tree is: \r\n" + tempID3);

        double tempAccuracy = tempID3.selfTest();
        System.out.println("The accuracy is: " + tempAccuracy);
    }// Of id3Test

    /**
     *************************
     * Test this class.
     *
     * @param args
     *            Not used now.
     *************************
     */
    public static void main(String[] args) {
        id3Test();
    }// Of main
}// Of class ID3

结果:

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:/a/43156.html

如若内容造成侵权/违法违规/事实不符,请联系我们进行投诉反馈qq邮箱809451989@qq.com,一经查实,立即删除!

相关文章

结构型模式 - 适配器模式

概述 如果去欧洲国家去旅游的话&#xff0c;他们的插座如下图最左边&#xff0c;是欧洲标准。而我们使用的插头如下图最右边的。因此我们的笔记本电脑&#xff0c;手机在当地不能直接充电。所以就需要一个插座转换器&#xff0c;转换器第1面插入当地的插座&#xff0c;第2面供…

springboot+vue开发后台增删改查

效果图 前端代码【User.vue】 <template><div class"data-container"><!--添加 start--><div class"data-header"><el-button round click"addHander" size"large" type"primary"><el-ic…

区间预测 | MATLAB实现QRBiLSTM双向长短期记忆神经网络分位数回归多输入单输出区间预测

区间预测 | MATLAB实现QRBiLSTM双向长短期记忆神经网络分位数回归多输入单输出区间预测 目录 区间预测 | MATLAB实现QRBiLSTM双向长短期记忆神经网络分位数回归多输入单输出区间预测效果一览基本介绍模型描述程序设计参考资料 效果一览 基本介绍 区间预测 | MATLAB实现QRBiLSTM…

uview中的常用的框

第一步&#xff1a; 先下载 uview UI 框架 详见 项目 引入 uView_vue引入uview_qq_2524963996的博客-CSDN博客【代码】 项目 引入 uView。_vue引入uviewhttps://blog.csdn.net/qq_44161703/article/details/131168976?spm1001.2014.3001.5501 第二步&#xff1a; 可以直接…

解锁潜力,驭数赋能:大数据与云计算的强强联合

随着数字化时代的来临&#xff0c;大数据和云计算已成为信息技术领域的两大热门话题。大数据指的是以海量、高速、多样化的数据为基础&#xff0c;通过分析和挖掘来获得有价值的信息和洞察。而云计算则是一种基于网络的计算模式&#xff0c;通过将数据和应用程序存储在云端服务…

【前端动画】科技感扫描效果 css动画animation

扫描出现动画 参考了网友写的二维码扫描 <template><div><div class"scan-content"><img style"width: 2rem;height: 2rem;" src"../../assets/images/eye.png" alt"" /><div class"line">…

Express 框架的基本操作

目录 1、应用生成器 2、基本路由 2.1、在跟路由下配置 GET请求&#xff0c;返回对应相应内容。 2.2、在跟路由下配置 POST请求&#xff0c;返回对应相应内容。 2.3、在跟路由下配置 PUT请求&#xff0c;返回对应相应内容。 2.4、在根路由下配置DELETE请求&#xff0c;返回对…

<Java物联网> 从主动到被动:Java中的BACnet设备属性查询

目录 BACnet 使用软件 资源 模拟器 使用Java主动查 引入maven 创建网络对象 获取远程设备 获取设备属性 使用DeviceEventAdapter订阅 初始化本地BACnet设备和IP网络配置&#xff1a; 启动本地设备和添加监听器&#xff1a; 搜寻远程设备&#xff1a; 发送订阅COV报…

mybatis事物是如何和spring事物整合的

目录 1、mybatis事物管理器 2、SpringManagedTransactionFactory如何处理事物 3、spring事物如何设置connection连接到threadLocal 1、mybatis事物管理器 mybatis事物抽象接口类&#xff1a;Transaction。该接口定义了事物基本方法和获取数据库连接方法 该类有三个实现类Jd…

基于Java+SpringBoot+Vue前后端分离旅游网站详细设计和实现

博主介绍&#xff1a;✌全网粉丝30W,csdn特邀作者、博客专家、CSDN新星计划导师、Java领域优质创作者,博客之星、掘金/华为云/阿里云/InfoQ等平台优质作者、专注于Java技术领域和毕业项目实战✌ &#x1f345;文末获取源码联系&#x1f345; &#x1f447;&#x1f3fb; 精彩专…

pytorch工具——使用pytorch构建一个分类器

目录 分类器任务和数据介绍训练分类器的步骤在GPU上训练模型 分类器任务和数据介绍 训练分类器的步骤 #1 import torch import torchvision import torchvision.transforms as transformstransformtransforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.…

docker数据网络管理

数据管理 管理 Docker 容器中数据主要有两种方式&#xff1a;数据卷&#xff08;Data Volumes&#xff09;和数据卷容器&#xff08;DataVolumes Containers&#xff09;。 1&#xff0e;数据卷 数据卷是一个供容器使用的特殊目录&#xff0c;位于容器中。可将宿主机的目录挂…

详细解析python视频选择--【思维导图知识范围】

C ,JAVA JAVAWEB ,微信小程序等 都有视频选择的分析。 语言视频选择收录专辑链接C张雪峰推荐选择了计算机专业之后-在大学期间卷起来-【大学生活篇】JAVA黑马B站视频JAVA部分的知识范围、学习步骤详解JAVAWEB黑马B站视频JAVAWEB部分的知识范围、学习步骤详解SpringBootSpringB…

Qt/C++音视频开发48-推流到rtsp服务器

一、前言 之前已经打通了rtmp的推流&#xff0c;理论上按照同样的代码&#xff0c;只要将rtmp推流地址换成rtsp推流地址&#xff0c;然后格式将flv换成rtsp就行&#xff0c;无奈直接遇到协议不支持的错误提示&#xff0c;网上说要换成rtp&#xff0c;换了也没用&#xff0c;而…

斯坦福数据挖掘教程·第三版》读书笔记(英文版)Chapter 13 Neural Nets and Deep Learning

来源&#xff1a;《斯坦福数据挖掘教程第三版》对应的公开英文书和PPT Chapter 13 Neural Nets and Deep Learning In this chapter, we shall consider the design of neural nets, which are collections of perceptrons, or nodes, where the outputs of one rank (or lay…

使用 Docker 快速上手中文版 LLaMA2 开源大模型

本篇文章&#xff0c;我们聊聊如何使用 Docker 容器快速上手朋友团队出品的中文版 LLaMA2 开源大模型&#xff0c;国内第一个真正开源&#xff0c;可以运行、下载、私有部署&#xff0c;并且支持商业使用。 写在前面 感慨于昨天 Meta LLaMA2 模型开放下载之后&#xff0c;Git…

Spring Security 的工作原理/总体架构

目录 1、过滤器的视角 2、DelegatingFilterProxy 委派过滤器代理&#xff08;类&#xff09; 2、FilterChainProxy 过滤器链代理&#xff08;类&#xff09; 4、SecurityFilterChain 安全过滤器链&#xff08;接口&#xff09; 5、Security Filters 安全过滤器实例 6、Sp…

基于sklearn计算precision、recall等分类指标

文章目录 一、分类指标函数1.1 precision_score函数1.2 recall_score函数1.3 accuracy_score函数1.4 f1_score函数1.5 precision_recall_curve函数1.6 roc_curve函数1.7 roc_auc_score函数1.8 classification_report函数 二、二分类任务三、多分类任务3.1 Macro Average&#x…

Meta牵手Microsoft推出下一代Llama 2

官方消息 1、今天&#xff0c;我们将介绍 Llama 2 的可用性&#xff0c;这是我们的下一代开源大型语言模型。 2、Llama 2免费用于研究和商业用途。 3、Microsoft和 Meta 正在扩大他们的长期合作伙伴关系&#xff0c;Microsoft 是 Llama 2 的首选合作伙伴。 4、在技术、学术…