🦄 个人主页——🎐开着拖拉机回家_Linux,大数据运维-CSDN博客 🎐✨🍁
🪁🍁🪁🍁🪁🍁🪁🍁 🪁🍁🪁🍁🪁🍁🪁 🪁🍁🪁🍁🪁🍁🪁🍁🪁🍁🪁🍁
感谢点赞和关注 ,每天进步一点点!加油!
目录
一、概述
二、集群版本信息
三、组件状态信息获取
三、告警实现
一、概述
Ambari 借鉴了很多成熟分布式软件的 API 设计。Rest API 就是一个很好地体现。通过 Ambari 的 Rest API,可以在脚本中通过 curl 维护整个集群。并且,我们可以用 Rest API 实现一些无法在 Ambari GUI 上面做的操作。
二、集群版本信息
三、组件状态信息获取
参考:【Ambari】Python调用Rest API 获取集群状态信息并发送钉钉告警-CSDN博客
RESOURCEMANAGER停止
curl -u admin:admin -i -H 'X-Requested-By:ambari' -X PUT -d '{"RequestInfo":{"context":"Restart RESOURCEMANAGER via REST"},"Body" : {"HostRoles" : {"state":"INSTALLED"}}}' http://192.168.2.153:8080/api/v1/clusters/winner/hosts/hdp105/host_components/RESOURCEMANAGER
- -u Ambari登录用户:密码
- -i -H获取http请求的完整头部信息,包括请求方法、请求地址、请求头信息等
- -X 同时想发 HEAD、GET 或 POST 请求,需在 -X 中声明要使用的请求方式
ambari页面显示停止成功调用
ambari页面RM服务停止
RESOURCEMANAGER启动
curl -u admin:admin -i -H 'X-Requested-By:ambari' -X PUT -d '{"RequestInfo":{"context":"Restart RESOURCEMANAGER via REST"},"Body" : {"HostRoles" : {"state":"STARTED"}}}' http://192.168.2.153:8080/api/v1/clusters/winner/hosts/hdp105/host_components/RESOURCEMANAGER
直接使用RESTART
curl -uadmin:admin -H 'X-Requested-By: ambari' -X POST -d '{"RequestInfo":{"command":"RESTART","context":"Restart RESOURCEMANAGER via REST","operation_level":{"level":"HOST","cluster_name":"winner"}},"Requests/resource_filters":[{"service_name":"YARN","component_name":"RESOURCEMANAGER","hosts":"hdp103"}]}' http://192.168.2.153:8080/api/v1/clusters/winner/requests
请求响应接受
四、告警实现
python 脚本实现RM HA的监控告警
# -*- coding: utf-8 -*-
import logging
import time
from imp import reload
import requests
import json
import sys
"""
~~~~~~~~~~~~
author: kangll
date: 2023/12/6 13:29
desc:
-- curl 请求,如下为测试链接
curl -u admin:admin -i -H X-Requested-By:ambari -XGET http://192.168.2.153:8080/api/v1/clusters/winner/hosts/winner
http://192.168.2.153:8080/api/v1/clusters/winner/hosts/hdp105/host_components/RESOURCEMANAGER
-- datanode 启动
curl -u admin:admin -i -H 'X-Requested-By:ambari' -X PUT -d '{"RequestInfo":{"context":"Start RESOURCEMANAGER via REST"}
,"Body" : {"ServiceInfo" : {"state":"STARTED"}}}' http://192.168.2.153:8080/api/v1/clusters/winner/services/HDFS
"""
reload(sys)
sys.setdefaultencoding('utf8')
__author__ = 'kanglilong <kangll@winnerinf.com>'
logger = logging.getLogger('mylogger')
logger.setLevel(level=logging.INFO)
# Ambari rest api 访问地址
control_url = "http://192.168.2.153:8080/api/v1/clusters/winner/hosts"
# ambari web 登录账号
AUTH = ("admin", "admin")
headers = {'Content-Type': 'application/json;charset=utf-8'}
# 钉钉URL
api_url = "https://oapi.dingtalk.com/robot/send?access_token=f4e0f344306ce9b6eec60bec95d5aa7c57f4264a791458dc09121dd7e948ac64"
# RM hostname
rm_hostname_01 = "hdp103"
rm_hostname_02 = "hdp105"
requests_rm_url = "http://192.168.2.153:8080/api/v1/clusters/winner/requests"
def getHostComponentsStatus():
"""
获取某个服务器上某个组件的状态信息
:param host:
:return: component_dict 组件与其状态
status 当前节点状态是否符合期望,
getStatus 是否获取到了状态
"""
now_time = time.localtime(time.time())
formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time)
json_text = {
"RequestInfo": {
"command": "RESTART",
"context": "Restart RESOURCEMANAGER via REST",
"operation_level": {
"level": "HOST",
"cluster_name": "winner"
}
},
"Requests/resource_filters": [{
"service_name": "YARN",
"component_name": "RESOURCEMANAGER",
"hosts": "{}".format(rm_hostname_01)
}]
}
get_rm_status_url_01 = control_url + "/{}/host_components/RESOURCEMANAGER".format(rm_hostname_01)
get_rm_status_url_02 = control_url + "/{}/host_components/RESOURCEMANAGER".format(rm_hostname_02)
print("-----------------")
rep01 = requests.get(get_rm_status_url_01, auth=AUTH)
# 如果状态码是20x 则获取成功
print(rep01.status_code)
if str(rep01.status_code).startswith("200"):
jsonRep01 = json.loads(rep01.text)
component_name_01 = jsonRep01['HostRoles']['component_name']
status_01 = jsonRep01['HostRoles']['state']
ha_state_01 = jsonRep01['HostRoles']['ha_state']
host_name_01 = jsonRep01['HostRoles']['host_name'] # STARTED
else:
# 没有正常获取到状态
print("没有正常获取到状态")
rep02 = requests.get(get_rm_status_url_02, auth=AUTH)
# 如果状态码是20x 则获取成功
print(rep02.status_code)
if str(rep02.status_code).startswith("200"):
jsonRep02 = json.loads(rep02.text)
component_name_02 = jsonRep02['HostRoles']['component_name']
status_02 = jsonRep02['HostRoles']['state']
ha_state_02 = jsonRep02['HostRoles']['ha_state']
host_name_02 = jsonRep02['HostRoles']['host_name'] # STARTED
else:
# 没有正常获取到状态
print("没有正常获取到状态")
# RM 为 STARTED status
if component_name_01 == component_name_02 and status_01 == "STARTED" and status_02 == "STARTED":
# 两个RM都为STANDBY则重启其中一个
if ha_state_01 == ha_state_02 and ha_state_01 == "STANDBY" and ha_state_02 == "STANDBY":
text = "告警对象:主机名:" + host_name_01 + ', ' + host_name_02 + ' \n组件名称:' + component_name_01 + " \n告警内容:HDP集群服务 " + component_name_01 + " 高可用状态异常 " + ha_state_01 + ", 触发重启" + "\n告警时间:" + formatted_time
send_msg(text)
print("RESOURCEMANAGER state abnormal.")
restart_res = requests.post(requests_rm_url, data=json.dumps(json_text), auth=AUTH, headers=headers)
print(restart_res.text)
if str(restart_res.status_code).startswith("202"):
# 没有正常获取到状态
print("重启请求发送成功!")
else:
# 没有正常获取到状态
print("重启请求发送成功!")
else:
#
print("RESOURCEMANAGER state normal.")
def send_msg(text):
"""
:param text: 告警文本
:return:
"""
json_text = {
"msgtype": "text",
"text": {
"content": text
}, "at": {
"atMobiles": ["1786881xxxx"]
}
}
requests.post(api_url, json.dumps(json_text), headers=headers).content
getHostComponentsStatus()
钉钉告警发送成功
RM 重启一台RM操作完成,触发HA的强制切换
需要添加配置 ambari.properties
vi /etc/ambari-server/conf/ambari.properties
# 添加如下配置
api.csrfPrevention.enabled=false
# 重启
ambari-server restart
request请求返回的状态码