基于Hive的天气情况大数据分析系统(通过hive进行大数据分析将分析的数据通过sqoop导入到mysql,通过Django基于mysql的数据做可视化)
-
Hive介绍:
Hive是建立在Hadoop之上的数据仓库基础架构,它提供了类似于SQL的语言(HQL),可以对大规模数据集进行查询和分析。通过Hive,我们可以在分布式存储系统中进行复杂的数据处理和分析。 -
Sqoop简介:
Sqoop是一个用于在Apache Hadoop和关系型数据库之间传输数据的工具。我们可以使用Sqoop将Hive中的分析结果导出到关系型数据库中,如MySQL,以便进一步处理和可视化。 -
Django概述:
Django是一个高级的Python Web框架,它提供了一系列工具和库,用于快速构建Web应用程序。我们可以利用Django连接到MySQL数据库,处理数据,并将其呈现为可视化界面。
Hive大数据分析sql,基于数据创建hive表,然后进行数据分析
-- 创建数据库
CREATE DATABASE IF NOT EXISTS big_data;
-- 切换到big_data数据库
USE big_data;
load data local inpath '/export/server/28' INTO TABLE weather_data;
-- 创建weather_data表
CREATE TABLE IF NOT EXISTS weather_data (
`date` STRING,
high_temperature STRING,
low_temperature STRING,
weather STRING,
wind_direction STRING,
city STRING
)ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',';
-- 插入数据到weather_data表(示例数据)
INSERT INTO TABLE weather_data VALUES
(1, '2022-01-01 周六', '6°', '-7°', '晴', '西北风3级', '北京'),
(2, '2022-01-02 周日', '2°', '-7°', '多云', '南风2级', '北京');
-- 创建etl_weather_data表
CREATE TABLE IF NOT EXISTS etl_weather_data (
`date` STRING,
day_of_week STRING,
high_temperature INT,
low_temperature INT,
weather STRING,
wind_direction STRING,
wind_speed STRING,
city STRING
);
-- 插入数据到etl_weather_data表
INSERT INTO TABLE etl_weather_data
SELECT
SUBSTR(`date`, 1, INSTR(`date`, ' ') - 1) AS `date`,
SUBSTR(`date`, INSTR(`date`, ' ') + 1) AS day_of_week,
CAST(SUBSTR(high_temperature, 1, INSTR(high_temperature, '°') - 1) AS INT) AS high_temperature,
CAST(SUBSTR(low_temperature, 1, INSTR(low_temperature, '°') - 1) AS INT) AS low_temperature,
weather,
REGEXP_REPLACE(SUBSTR(wind_direction, 1, INSTR(wind_direction, '级') - 1), '[0-9]', '') AS wind_direction,
SUBSTR(SUBSTR(wind_direction, INSTR(wind_direction, '风') + 1),1,1) AS wind_speed,
city
FROM
weather_data;
-- 1.统计一年中每个城市晴天个数的top10
CREATE TABLE IF NOT EXISTS top_sunny_cities (
city STRING,
sunny_days_count INT
);
INSERT INTO TABLE top_sunny_cities
SELECT
city,
COUNT(*) AS sunny_days_count
FROM
etl_weather_data
WHERE
weather LIKE '%晴%'
GROUP BY
city
ORDER BY
sunny_days_count DESC
LIMIT 10;
-- 2.统计北京一年中每个月的温差变化
CREATE TABLE IF NOT EXISTS monthly_max_temperature_difference (
month_year STRING,
max_temperature_difference INT
);
INSERT INTO TABLE monthly_max_temperature_difference
SELECT
CONCAT(YEAR(`date`), '-', LPAD(MONTH(`date`), 2, '0')) AS month_year,
MAX(high_temperature - low_temperature) AS max_temperature_difference
FROM
etl_weather_data
WHERE
city = '北京'
GROUP BY
YEAR(`date`), MONTH(`date`);
-- 3.统计城市出现3级以上风速最多的10个城市
CREATE TABLE IF NOT EXISTS top_cities_high_wind (
city STRING,
high_wind_days_count INT
);
INSERT INTO TABLE top_cities_high_wind
SELECT
city,
COUNT(*) AS high_wind_days_count
FROM
etl_weather_data
WHERE
CAST(wind_speed AS INT) >= 3
GROUP BY
city
ORDER BY
high_wind_days_count DESC
LIMIT 10;
基于sqoop将数据导入到mysql中
sqoop export \
--connect jdbc:mysql://192.168.138.1:3306/big_data \
--username root --password '123456' \
--table top_sunny_cities_sqoop \
--export-dir /hive/warehouse/big_data.db/big_data.dbbig_data.db/top_sunny_cities \
--input-fields-terminated-by '\001' \
--input-lines-terminated-by '\n';
sqoop export \
--connect jdbc:mysql:// 192.168.138.1:3306/big_data \
--username root --password 123456 \
--table monthly_max_temperature_difference \
--export-dir /user/hive/warehouse/big_data.db/big_data.dbmonthly_max_temperature_difference \
--input-fields-terminated-by '\001' \
--input-lines-terminated-by '\n'
sqoop export \
--connect jdbc:mysql:// 192.168.138.1:3306/big_data \
--username root --password 123456 \
--table top_cities_high_wind \
--export-dir /user/hive/warehouse/big_data.db/big_data.dbtop_cities_high_wind \
--input-fields-terminated-by '\001' \
--input-lines-terminated-by '\n'
基于mysql数据使用Django做数据可视化
from django.contrib.auth.decorators import login_required
from django.http import HttpResponse
from django.shortcuts import render, redirect
from django.shortcuts import render
from pyecharts import options as opts
from pyecharts.charts import Line, Bar, Pie, Grid
# Create your views here.
from django.shortcuts import render
from pyecharts.globals import ThemeType
from api.service.task_service import get_user, top_sunny_cities, monthly_max_temperature_difference, \
top_cities_high_wind, top_rainy_cities, monthly_rainy_days, yearly_min_temperatures, daily_wind_speed, \
daily_temperature_difference, register_user
def login_page(request):
if request.method == 'POST':
username = request.POST.get('username')
password = request.POST.get('password')
user = get_user(username,password)
if user is not None:
return redirect('home')
else:
return render(request, 'login.html', {'error_message': 'Invalid login credentials.'})
return render(request, 'login.html')
def register_view(request):
# 处理注册逻辑
if request.method == 'GET':
username = request.GET.get('username')
password = request.GET.get('password')
if username and password:
register_user(username,password)
return HttpResponse("注册成功!")
return render(request, 'register.html') # 使用你的注册模板路径
def home(request):
print(2)
return render(request, 'home.html')
def data_analysis(request, button_id):
return render(request, 'data_analysis.html', {'button_id': button_id})
def data_analysis(request, button_id):
# 根据按钮 ID 进行不同的处理
if button_id == 1:
x,y = top_sunny_cities()
line_chart = (
Line()
.add_xaxis(xaxis_data=x)
.add_yaxis(series_name="晴天个数", y_axis=y)
.set_global_opts(title_opts=opts.TitleOpts(title="一年中每个城市晴天个数的top10"))
)
chart_html = line_chart.render_embed()
button_name = "折线图"
elif button_id == 2:
x,y = monthly_max_temperature_difference()
line_chart = (
Line()
.add_xaxis(xaxis_data=x)
.add_yaxis(series_name="温差值", y_axis=y)
.set_global_opts(title_opts=opts.TitleOpts(title="北京一年中每个月的温差变化"))
)
chart_html = line_chart.render_embed()
button_name = "折线图"
elif button_id == 3:
x,y = top_cities_high_wind()
bar_chart = (
Bar()
.add_xaxis(xaxis_data=x)
.add_yaxis(series_name="3级风速次数",y_axis=y)
.set_global_opts(title_opts=opts.TitleOpts(title="出现3级以上风速的top10个城市"))
)
chart_html = bar_chart.render_embed()
button_name = "条形图"
elif button_id == 4:
x, y = top_rainy_cities()
bar_chart = (
Bar()
.add_xaxis(xaxis_data=x)
.add_yaxis(series_name="雨天数量", y_axis=y)
.set_global_opts(title_opts=opts.TitleOpts(title="多雨城市的top10"))
)
chart_html = bar_chart.render_embed()
button_name = "条形图"
elif button_id == 5:
x, y = monthly_rainy_days()
pie = Pie()
pie.add("", list(zip(x, y)))
pie.set_global_opts(title_opts={"text": "杭州每月雨天变化", "subtext": "2022年"},
legend_opts=opts.LegendOpts(orient="vertical", pos_right="right", pos_top="center"))
chart_html = pie.render_embed()
button_name = "饼图"
elif button_id == 6:
x, y = yearly_min_temperatures()
line_chart = (
Line()
.add_xaxis(xaxis_data=x)
.add_yaxis(series_name="温度", y_axis=y)
.set_global_opts(title_opts=opts.TitleOpts(title="城市一年中最低的温度top10"))
)
chart_html = line_chart.render_embed()
button_name = "折线图"
elif button_id == 7:
x,y=daily_temperature_difference()
# 创建饼图
pie = (
Pie(init_opts=opts.InitOpts(width="800px", height="600px"))
.add(
series_name="南京10月份1~10号温差变化",
data_pair=list(zip(x, y)),
radius=["40%", "75%"], # 设置内外半径,实现空心效果
label_opts=opts.LabelOpts(is_show=True, position="inside"),
)
.set_global_opts(title_opts=opts.TitleOpts(title="南京10月份1~10号温差变化"),
legend_opts=opts.LegendOpts(orient="vertical", pos_right="right", pos_top="center"),
)
.set_series_opts( # 设置系列选项,调整 is_show 阈值
label_opts=opts.LabelOpts(is_show=True)
)
)
chart_html = pie.render_embed()
button_name = "饼图"
elif button_id == 8:
x,y=daily_wind_speed()
bar_chart = (
Bar()
.add_xaxis(xaxis_data=x)
.add_yaxis(series_name="风速级别", y_axis=y)
.set_global_opts(title_opts=opts.TitleOpts(title="南京10月份每天的风速变化"))
)
chart_html = bar_chart.render_embed()
button_name = "条形图"
return render(request, 'data_analysis.html', {'chart_html': chart_html, 'button_name': button_name})
展示Django项目运行结果
如有遇到问题可以找小编沟通交流哦。另外小编帮忙辅导大课作业,学生毕设等。不限于python,java,大数据等。