项目视频讲解:
基于机器学习的居民消费影响因子分析预测_哔哩哔哩_bilibili
主要工作内容:
完整代码:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
import warnings
warnings.filterwarnings('ignore')
#设置字体为SimHei显示中文
plt.rcParams['font.sans-serif'] = 'SimHei'
#设置正常显示负号字符
plt.rcParams['axes.unicode_minus'] = False
# 读取数据
data = pd.read_csv('pay_gap_Europe.csv')
data.sample(10)
# 检查缺失值
missing_values = data.isnull().sum()
missing_values = missing_values[missing_values > 0].sort_values(ascending=False)
# 缺失占比
missing_percentage = (missing_values / len(data)) * 100
missing_data