【HBase】HBaseJMX接口监控信息实现钉钉告警
- 软件开发
- 2025-09-02 06:54:02

目录
一、JMX 简介
二、JMX监控信息钉钉告警实现
一、JMX 简介
官网:Apache HBase ™ Reference Guide
JMX (Java管理扩展)提供了内置的工具,使您能够监视和管理Java VM。要启用远程系统的监视和管理,需要在启动Java VM时设置系统属性com.sun.management.jmxremote.port(希望通过该端口号启用JMX RMI连接)。
访问:
curl http://hdp-node2:16030/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server输出的 指标如下:
{ "beans" : [ { "name" : "Hadoop:service=HBase,name=RegionServer,sub=Server", "modelerType" : "RegionServer,sub=Server", "tag.zookeeperQuorum" : "hdp-node2:2181,hdp-node3:2181,hdp-node1:2181", "tag.serverName" : "hdp-node2,16020,1738720067137", "tag.clusterId" : "85aa06c7-b28c-41fd-aa17-a49376641751", "tag.Context" : "regionserver", "tag.Hostname" : "hdp-node2", "regionCount" : 34, "storeCount" : 51, "hlogFileCount" : 1, "hlogFileSize" : 0, "storeFileCount" : 31, "memStoreSize" : 0, "storeFileSize" : 212110208, "maxStoreFileAge" : 23910274739, "minStoreFileAge" : 595075791, "avgStoreFileAge" : 12083745007, "numReferenceFiles" : 0, "regionServerStartTime" : 1738720067137, "averageRegionSize" : 6238535, "storeFileIndexSize" : 529736, "staticIndexSize" : 1348988, "staticBloomSize" : 2438222, "mutationsWithoutWALCount" : 0, "mutationsWithoutWALSize" : 0, "percentFilesLocal" : 100.0, "percentFilesLocalSecondaryRegions" : 0.0, "splitQueueLength" : 0, "compactionQueueLength" : 0, "smallCompactionQueueLength" : 0, "largeCompactionQueueLength" : 0, "flushQueueLength" : 0, "blockCacheFreeSize" : 1716727624, "blockCacheCount" : 0, "blockCacheSize" : 1259320, "blockCacheCountHitPercent" : 0.0, "blockCacheExpressHitPercent" : 0.0, "l1CacheHitCount" : 0, "l1CacheMissCount" : 0, "l1CacheHitRatio" : 0.0, "l1CacheMissRatio" : 0.0, "l2CacheHitCount" : 0, "l2CacheMissCount" : 0, "l2CacheHitRatio" : 0.0, "l2CacheMissRatio" : 0.0, "mobFileCacheCount" : 0, "mobFileCacheHitPercent" : 0.0, "totalRequestCount" : 2, "totalRowActionRequestCount" : 0, "readRequestCount" : 0, "filteredReadRequestCount" : 0, "writeRequestCount" : 0, "rpcGetRequestCount" : 0, "rpcScanRequestCount" : 0, "rpcMultiRequestCount" : 0, "rpcMutateRequestCount" : 0, "checkMutateFailedCount" : 0, "checkMutatePassedCount" : 0, "blockCacheHitCount" : 0, "blockCacheHitCountPrimary" : 0, "blockCacheMissCount" : 0, "blockCacheMissCountPrimary" : 0, "blockCacheEvictionCount" : 0, "blockCacheEvictionCountPrimary" : 0, "blockCacheFailedInsertionCount" : 0, "blockCacheDataMissCount" : 0, "blockCacheLeafIndexMissCount" : 0, "blockCacheBloomChunkMissCount" : 0, "blockCacheMetaMissCount" : 0, "blockCacheRootIndexMissCount" : 0, "blockCacheIntermediateIndexMissCount" : 0, "blockCacheFileInfoMissCount" : 0, "blockCacheGeneralBloomMetaMissCount" : 0, "blockCacheDeleteFamilyBloomMissCount" : 0, "blockCacheTrailerMissCount" : 0, "blockCacheDataHitCount" : 0, "blockCacheLeafIndexHitCount" : 0, "blockCacheBloomChunkHitCount" : 0, "blockCacheMetaHitCount" : 0, "blockCacheRootIndexHitCount" : 0, "blockCacheIntermediateIndexHitCount" : 0, "blockCacheFileInfoHitCount" : 0, "blockCacheGeneralBloomMetaHitCount" : 0, "blockCacheDeleteFamilyBloomHitCount" : 0, "blockCacheTrailerHitCount" : 0, "updatesBlockedTime" : 0, "flushedCellsCount" : 0, "compactedCellsCount" : 0, "majorCompactedCellsCount" : 0, "flushedCellsSize" : 0, "compactedCellsSize" : 0, "majorCompactedCellsSize" : 0, "cellsCountCompactedFromMob" : 0, "cellsCountCompactedToMob" : 0, "cellsSizeCompactedFromMob" : 0, "cellsSizeCompactedToMob" : 0, "mobFlushCount" : 0, "mobFlushedCellsCount" : 0, "mobFlushedCellsSize" : 0, "mobScanCellsCount" : 0, "mobScanCellsSize" : 0, "mobFileCacheAccessCount" : 0, "mobFileCacheMissCount" : 0, "mobFileCacheEvictedCount" : 0, "hedgedReads" : 0, "hedgedReadWins" : 0, "blockedRequestCount" : 0, "MajorCompactionTime_num_ops" : 2, "MajorCompactionTime_min" : 0, "MajorCompactionTime_max" : 0, "MajorCompactionTime_mean" : 0, "MajorCompactionTime_25th_percentile" : 0, "MajorCompactionTime_median" : 0, "MajorCompactionTime_75th_percentile" : 0, "MajorCompactionTime_90th_percentile" : 0, "MajorCompactionTime_95th_percentile" : 0, "MajorCompactionTime_98th_percentile" : 0, "MajorCompactionTime_99th_percentile" : 0, "MajorCompactionTime_99.9th_percentile" : 0, "MajorCompactionTime_TimeRangeCount_600000-inf" : 2, "PauseTimeWithGc_num_ops" : 0, "PauseTimeWithGc_min" : 0, "PauseTimeWithGc_max" : 0, "PauseTimeWithGc_mean" : 0, "PauseTimeWithGc_25th_percentile" : 0, "PauseTimeWithGc_median" : 0, "PauseTimeWithGc_75th_percentile" : 0, "PauseTimeWithGc_90th_percentile" : 0, "PauseTimeWithGc_95th_percentile" : 0, "PauseTimeWithGc_98th_percentile" : 0, "PauseTimeWithGc_99th_percentile" : 0, "PauseTimeWithGc_99.9th_percentile" : 0, "compactedOutputBytes" : 8924, "pauseWarnThresholdExceeded" : 0, "ScanTime_num_ops" : 0, "ScanTime_min" : 0, "ScanTime_max" : 0, "ScanTime_mean" : 0, "ScanTime_25th_percentile" : 0, "ScanTime_median" : 0, "ScanTime_75th_percentile" : 0, "ScanTime_90th_percentile" : 0, "ScanTime_95th_percentile" : 0, "ScanTime_98th_percentile" : 0, "ScanTime_99th_percentile" : 0, "ScanTime_99.9th_percentile" : 0, "Increment_num_ops" : 0, "Increment_min" : 0, "Increment_max" : 0, "Increment_mean" : 0, "Increment_25th_percentile" : 0, "Increment_median" : 0, "Increment_75th_percentile" : 0, "Increment_90th_percentile" : 0, "Increment_95th_percentile" : 0, "Increment_98th_percentile" : 0, "Increment_99th_percentile" : 0, "Increment_99.9th_percentile" : 0, "Delete_num_ops" : 0, "Delete_min" : 0, "Delete_max" : 0, "Delete_mean" : 0, "Delete_25th_percentile" : 0, "Delete_median" : 0, "Delete_75th_percentile" : 0, "Delete_90th_percentile" : 0, "Delete_95th_percentile" : 0, "Delete_98th_percentile" : 0, "Delete_99th_percentile" : 0, "Delete_99.9th_percentile" : 0, "Put_num_ops" : 0, "Put_min" : 0, "Put_max" : 0, "Put_mean" : 0, "Put_25th_percentile" : 0, "Put_median" : 0, "Put_75th_percentile" : 0, "Put_90th_percentile" : 0, "Put_95th_percentile" : 0, "Put_98th_percentile" : 0, "Put_99th_percentile" : 0, "Put_99.9th_percentile" : 0, "DeleteBatch_num_ops" : 0, "DeleteBatch_min" : 0, "DeleteBatch_max" : 0, "DeleteBatch_mean" : 0, "DeleteBatch_25th_percentile" : 0, "DeleteBatch_median" : 0, "DeleteBatch_75th_percentile" : 0, "DeleteBatch_90th_percentile" : 0, "DeleteBatch_95th_percentile" : 0, "DeleteBatch_98th_percentile" : 0, "DeleteBatch_99th_percentile" : 0, "DeleteBatch_99.9th_percentile" : 0, "splitRequestCount" : 0, "FlushMemstoreSize_num_ops" : 0, "FlushMemstoreSize_min" : 0, "FlushMemstoreSize_max" : 0, "FlushMemstoreSize_mean" : 0, "FlushMemstoreSize_25th_percentile" : 0, "FlushMemstoreSize_median" : 0, "FlushMemstoreSize_75th_percentile" : 0, "FlushMemstoreSize_90th_percentile" : 0, "FlushMemstoreSize_95th_percentile" : 0, "FlushMemstoreSize_98th_percentile" : 0, "FlushMemstoreSize_99th_percentile" : 0, "FlushMemstoreSize_99.9th_percentile" : 0, "CompactionInputFileCount_num_ops" : 2, "CompactionInputFileCount_min" : 0, "CompactionInputFileCount_max" : 0, "CompactionInputFileCount_mean" : 0, "CompactionInputFileCount_25th_percentile" : 0, "CompactionInputFileCount_median" : 0, "CompactionInputFileCount_75th_percentile" : 0, "CompactionInputFileCount_90th_percentile" : 0, "CompactionInputFileCount_95th_percentile" : 0, "CompactionInputFileCount_98th_percentile" : 0, "CompactionInputFileCount_99th_percentile" : 0, "CompactionInputFileCount_99.9th_percentile" : 0, "PutBatch_num_ops" : 0, "PutBatch_min" : 0, "PutBatch_max" : 0, "PutBatch_mean" : 0, "PutBatch_25th_percentile" : 0, "PutBatch_median" : 0, "PutBatch_75th_percentile" : 0, "PutBatch_90th_percentile" : 0, "PutBatch_95th_percentile" : 0, "PutBatch_98th_percentile" : 0, "PutBatch_99th_percentile" : 0, "PutBatch_99.9th_percentile" : 0, "CompactionTime_num_ops" : 2, "CompactionTime_min" : 0, "CompactionTime_max" : 0, "CompactionTime_mean" : 0, "CompactionTime_25th_percentile" : 0, "CompactionTime_median" : 0, "CompactionTime_75th_percentile" : 0, "CompactionTime_90th_percentile" : 0, "CompactionTime_95th_percentile" : 0, "CompactionTime_98th_percentile" : 0, "CompactionTime_99th_percentile" : 0, "CompactionTime_99.9th_percentile" : 0, "CompactionTime_TimeRangeCount_600000-inf" : 2, "Get_num_ops" : 0, "Get_min" : 0, "Get_max" : 0, "Get_mean" : 0, "Get_25th_percentile" : 0, "Get_median" : 0, "Get_75th_percentile" : 0, "Get_90th_percentile" : 0, "Get_95th_percentile" : 0, "Get_98th_percentile" : 0, "Get_99th_percentile" : 0, "Get_99.9th_percentile" : 0, "MajorCompactionInputFileCount_num_ops" : 2, "MajorCompactionInputFileCount_min" : 0, "MajorCompactionInputFileCount_max" : 0, "MajorCompactionInputFileCount_mean" : 0, "MajorCompactionInputFileCount_25th_percentile" : 0, "MajorCompactionInputFileCount_median" : 0, "MajorCompactionInputFileCount_75th_percentile" : 0, "MajorCompactionInputFileCount_90th_percentile" : 0, "MajorCompactionInputFileCount_95th_percentile" : 0, "MajorCompactionInputFileCount_98th_percentile" : 0, "MajorCompactionInputFileCount_99th_percentile" : 0, "MajorCompactionInputFileCount_99.9th_percentile" : 0, "CheckAndPut_num_ops" : 0, "CheckAndPut_min" : 0, "CheckAndPut_max" : 0, "CheckAndPut_mean" : 0, "CheckAndPut_25th_percentile" : 0, "CheckAndPut_median" : 0, "CheckAndPut_75th_percentile" : 0, "CheckAndPut_90th_percentile" : 0, "CheckAndPut_95th_percentile" : 0, "CheckAndPut_98th_percentile" : 0, "CheckAndPut_99th_percentile" : 0, "CheckAndPut_99.9th_percentile" : 0, "SplitTime_num_ops" : 0, "SplitTime_min" : 0, "SplitTime_max" : 0, "SplitTime_mean" : 0, "SplitTime_25th_percentile" : 0, "SplitTime_median" : 0, "SplitTime_75th_percentile" : 0, "SplitTime_90th_percentile" : 0, "SplitTime_95th_percentile" : 0, "SplitTime_98th_percentile" : 0, "SplitTime_99th_percentile" : 0, "SplitTime_99.9th_percentile" : 0, "MajorCompactionOutputSize_num_ops" : 2, "MajorCompactionOutputSize_min" : 0, "MajorCompactionOutputSize_max" : 0, "MajorCompactionOutputSize_mean" : 0, "MajorCompactionOutputSize_25th_percentile" : 0, "MajorCompactionOutputSize_median" : 0, "MajorCompactionOutputSize_75th_percentile" : 0, "MajorCompactionOutputSize_90th_percentile" : 0, "MajorCompactionOutputSize_95th_percentile" : 0, "MajorCompactionOutputSize_98th_percentile" : 0, "MajorCompactionOutputSize_99th_percentile" : 0, "MajorCompactionOutputSize_99.9th_percentile" : 0, "MajorCompactionOutputSize_SizeRangeCount_100000000-inf" : 2, "majorCompactedInputBytes" : 8924, "slowAppendCount" : 0, "flushedOutputBytes" : 0, "CompactionOutputFileCount_num_ops" : 2, "CompactionOutputFileCount_min" : 0, "CompactionOutputFileCount_max" : 0, "CompactionOutputFileCount_mean" : 0, "CompactionOutputFileCount_25th_percentile" : 0, "CompactionOutputFileCount_median" : 0, "CompactionOutputFileCount_75th_percentile" : 0, "CompactionOutputFileCount_90th_percentile" : 0, "CompactionOutputFileCount_95th_percentile" : 0, "CompactionOutputFileCount_98th_percentile" : 0, "CompactionOutputFileCount_99th_percentile" : 0, "CompactionOutputFileCount_99.9th_percentile" : 0, "slowDeleteCount" : 0, "Replay_num_ops" : 0, "Replay_min" : 0, "Replay_max" : 0, "Replay_mean" : 0, "Replay_25th_percentile" : 0, "Replay_median" : 0, "Replay_75th_percentile" : 0, "Replay_90th_percentile" : 0, "Replay_95th_percentile" : 0, "Replay_98th_percentile" : 0, "Replay_99th_percentile" : 0, "Replay_99.9th_percentile" : 0, "FlushTime_num_ops" : 0, "FlushTime_min" : 0, "FlushTime_max" : 0, "FlushTime_mean" : 0, "FlushTime_25th_percentile" : 0, "FlushTime_median" : 0, "FlushTime_75th_percentile" : 0, "FlushTime_90th_percentile" : 0, "FlushTime_95th_percentile" : 0, "FlushTime_98th_percentile" : 0, "FlushTime_99th_percentile" : 0, "FlushTime_99.9th_percentile" : 0, "MajorCompactionInputSize_num_ops" : 2, "MajorCompactionInputSize_min" : 0, "MajorCompactionInputSize_max" : 0, "MajorCompactionInputSize_mean" : 0, "MajorCompactionInputSize_25th_percentile" : 0, "MajorCompactionInputSize_median" : 0, "MajorCompactionInputSize_75th_percentile" : 0, "MajorCompactionInputSize_90th_percentile" : 0, "MajorCompactionInputSize_95th_percentile" : 0, "MajorCompactionInputSize_98th_percentile" : 0, "MajorCompactionInputSize_99th_percentile" : 0, "MajorCompactionInputSize_99.9th_percentile" : 0, "MajorCompactionInputSize_SizeRangeCount_100000000-inf" : 2, "pauseInfoThresholdExceeded" : 0, "splitSuccessCount" : 0, "CheckAndDelete_num_ops" : 0, "CheckAndDelete_min" : 0, "CheckAndDelete_max" : 0, "CheckAndDelete_mean" : 0, "CheckAndDelete_25th_percentile" : 0, "CheckAndDelete_median" : 0, "CheckAndDelete_75th_percentile" : 0, "CheckAndDelete_90th_percentile" : 0, "CheckAndDelete_95th_percentile" : 0, "CheckAndDelete_98th_percentile" : 0, "CheckAndDelete_99th_percentile" : 0, "CheckAndDelete_99.9th_percentile" : 0, "CompactionInputSize_num_ops" : 2, "CompactionInputSize_min" : 0, "CompactionInputSize_max" : 0, "CompactionInputSize_mean" : 0, "CompactionInputSize_25th_percentile" : 0, "CompactionInputSize_median" : 0, "CompactionInputSize_75th_percentile" : 0, "CompactionInputSize_90th_percentile" : 0, "CompactionInputSize_95th_percentile" : 0, "CompactionInputSize_98th_percentile" : 0, "CompactionInputSize_99th_percentile" : 0, "CompactionInputSize_99.9th_percentile" : 0, "CompactionInputSize_SizeRangeCount_100000000-inf" : 2, "MajorCompactionOutputFileCount_num_ops" : 2, "MajorCompactionOutputFileCount_min" : 0, "MajorCompactionOutputFileCount_max" : 0, "MajorCompactionOutputFileCount_mean" : 0, "MajorCompactionOutputFileCount_25th_percentile" : 0, "MajorCompactionOutputFileCount_median" : 0, "MajorCompactionOutputFileCount_75th_percentile" : 0, "MajorCompactionOutputFileCount_90th_percentile" : 0, "MajorCompactionOutputFileCount_95th_percentile" : 0, "MajorCompactionOutputFileCount_98th_percentile" : 0, "MajorCompactionOutputFileCount_99th_percentile" : 0, "MajorCompactionOutputFileCount_99.9th_percentile" : 0, "ScanSize_num_ops" : 0, "ScanSize_min" : 0, "ScanSize_max" : 0, "ScanSize_mean" : 0, "ScanSize_25th_percentile" : 0, "ScanSize_median" : 0, "ScanSize_75th_percentile" : 0, "ScanSize_90th_percentile" : 0, "ScanSize_95th_percentile" : 0, "ScanSize_98th_percentile" : 0, "ScanSize_99th_percentile" : 0, "ScanSize_99.9th_percentile" : 0, "slowGetCount" : 0, "flushedMemstoreBytes" : 0, "CompactionOutputSize_num_ops" : 2, "CompactionOutputSize_min" : 0, "CompactionOutputSize_max" : 0, "CompactionOutputSize_mean" : 0, "CompactionOutputSize_25th_percentile" : 0, "CompactionOutputSize_median" : 0, "CompactionOutputSize_75th_percentile" : 0, "CompactionOutputSize_90th_percentile" : 0, "CompactionOutputSize_95th_percentile" : 0, "CompactionOutputSize_98th_percentile" : 0, "CompactionOutputSize_99th_percentile" : 0, "CompactionOutputSize_99.9th_percentile" : 0, "CompactionOutputSize_SizeRangeCount_100000000-inf" : 2, "majorCompactedOutputBytes" : 8924, "PauseTimeWithoutGc_num_ops" : 0, "PauseTimeWithoutGc_min" : 0, "PauseTimeWithoutGc_max" : 0, "PauseTimeWithoutGc_mean" : 0, "PauseTimeWithoutGc_25th_percentile" : 0, "PauseTimeWithoutGc_median" : 0, "PauseTimeWithoutGc_75th_percentile" : 0, "PauseTimeWithoutGc_90th_percentile" : 0, "PauseTimeWithoutGc_95th_percentile" : 0, "PauseTimeWithoutGc_98th_percentile" : 0, "PauseTimeWithoutGc_99th_percentile" : 0, "PauseTimeWithoutGc_99.9th_percentile" : 0, "slowPutCount" : 0, "slowIncrementCount" : 0, "compactedInputBytes" : 8924, "Append_num_ops" : 0, "Append_min" : 0, "Append_max" : 0, "Append_mean" : 0, "Append_25th_percentile" : 0, "Append_median" : 0, "Append_75th_percentile" : 0, "Append_90th_percentile" : 0, "Append_95th_percentile" : 0, "Append_98th_percentile" : 0, "Append_99th_percentile" : 0, "Append_99.9th_percentile" : 0, "FlushOutputSize_num_ops" : 0, "FlushOutputSize_min" : 0, "FlushOutputSize_max" : 0, "FlushOutputSize_mean" : 0, "FlushOutputSize_25th_percentile" : 0, "FlushOutputSize_median" : 0, "FlushOutputSize_75th_percentile" : 0, "FlushOutputSize_90th_percentile" : 0, "FlushOutputSize_95th_percentile" : 0, "FlushOutputSize_98th_percentile" : 0, "FlushOutputSize_99th_percentile" : 0, "FlushOutputSize_99.9th_percentile" : 0, "Bulkload_count" : 0, "Bulkload_mean_rate" : 0.0, "Bulkload_1min_rate" : 0.0, "Bulkload_5min_rate" : 0.0, "Bulkload_15min_rate" : 0.0, "Bulkload_num_ops" : 0, "Bulkload_min" : 0, "Bulkload_max" : 0, "Bulkload_mean" : 0, "Bulkload_25th_percentile" : 0, "Bulkload_median" : 0, "Bulkload_75th_percentile" : 0, "Bulkload_90th_percentile" : 0, "Bulkload_95th_percentile" : 0, "Bulkload_98th_percentile" : 0, "Bulkload_99th_percentile" : 0, "Bulkload_99.9th_percentile" : 0 } ] }如上监控主要是HBase 内某个RegionServer 详细信息。具体有gc, scan,flush ,block,compaction 等细粒度的监控。
二、JMX监控信息钉钉告警实现
下面我们实现了一个RegionServer运行时长的钉钉通知消息
# -*- coding: utf-8 -*- import time import requests import json import schedule as schedule """ ~~~~~~~~~~~~ author: kangll date: 2025/02/11 11:50 desc: reid cluster HBase JMX 获取指标信息 -- curl 请求: curl http://hdp-node2:16030/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server 2小时发出一次正常的RS运行状态信息,RS运行时间小于10分钟且当前小时立即发出一次告警 """ __author__ = 'kanglilong <kangll@winnerinf >' headers = {'Content-Type': 'application/json;charset=utf-8'} hostArr = {"hdp-node1", "hdp-node2", "hdp-node3"} dingding_url = " oapi.dingtalk /robot/send?access_token=ba7693ae5a1a5a4cda1358f35b19785a6d8a7659da92ba3685d6532994a6d82c" # 记录上一次发送运行时间小于 10 分钟告警的小时 last_less_than_10mins_alert_hour = None def jmxGetHBaseStatus(regionserver_host): """ 从 HBase JMX 接口获取 RegionServer 运行时长信息 :return: 告警信息 """ jmx_port = 16030 # 构建JMX查询URL,用于获取运行时间指标 jmx_url = f'http://{regionserver_host}:{jmx_port}/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server' try: # 发送HTTP请求获取JMX数据 response = requests.get(jmx_url) # 检查响应状态码 response.raise_for_status() # 解析JSON响应 jmx_data = response.json() # 从JMX数据中提取运行时间(单位:毫秒) region_server_start_time = jmx_data['beans'][0]['regionServerStartTime'] # 获取当前时间戳(毫秒) current_time = int(time.time() * 1000) # 计算RegionServer运行时长(毫秒) uptime = current_time - region_server_start_time # 将运行时长转换为时分秒格式 uptime_hms = convert_milliseconds_to_hms(uptime) text = f"hostname: {regionserver_host}, RegionServer uptime: {uptime_hms}" return text except requests.exceptions.RequestException as e: print(f'请求出错: {e}') except (KeyError, IndexError, json.JSONDecodeError) as e: print(f'解析 JMX 数据出错: {e}') def jmxGetHBaseAlarmStatus(regionserver_host): """ 从 HBase JMX 接口获取 RegionServer 重启的运行时长,也就是运行时间小于10min :return: 告警信息 """ jmx_port = 16030 # 构建JMX查询URL,用于获取运行时间指标 jmx_url = f'http://{regionserver_host}:{jmx_port}/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server' try: text = "" now_time = time.localtime(time.time()) formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time) # 发送HTTP请求获取JMX数据 response = requests.get(jmx_url) if response.status_code == 200: # 检查响应状态码 # response.raise_for_status() # 解析JSON响应 jmx_data = response.json() if len(jmx_data['beans'][0]) > 400: # print("---", less_than_10mins_alert_sent) # if jmx_data is not None and len(jmx_data) > 0: # 从JMX数据中提取运行时间(单位:毫秒) region_server_start_time = jmx_data['beans'][0]['regionServerStartTime'] # 获取当前时间戳(毫秒) current_time = int(time.time() * 1000) # 计算 RegionServer 运行时长(毫秒) uptime = current_time - region_server_start_time # 将运行时长转换为时分秒格式 uptime_hms = convert_milliseconds_to_hms(uptime) # current_hour = time.localtime().tm_hour global last_less_than_10mins_alert_hour if uptime is not None: if uptime < 10 * 60 * 1000: # 运行时间小于 10 分钟 if last_less_than_10mins_alert_hour is None or last_less_than_10mins_alert_hour != current_hour: print("++++", last_less_than_10mins_alert_hour) text = "告警类型: reid 集群HBase 重启告警通知 \n" + "告警信息: \n" + f"hostname: {regionserver_host} ,RegionServer uptime: {uptime_hms} " + "\n告警时间:" + formatted_time # 发出告警 msg(text, dingding_url) last_less_than_10mins_alert_hour = current_hour # print(f"hostname: {regionserver_host}, RegionServer uptime: {uptime_hms}") return text except requests.exceptions.RequestException as e: print(f'请求出错: {e}') except (KeyError, IndexError, json.JSONDecodeError) as e: print(f'解析 JMX 数据出错: {e}') return None def convert_milliseconds_to_hms(milliseconds): """ 将毫秒转换为时分秒的格式 :param milliseconds: 毫秒数 :return: 时分秒格式的字符串 """ seconds = milliseconds // 1000 hours = seconds // 3600 seconds %= 3600 minutes = seconds // 60 seconds %= 60 return f"{hours}小时 {minutes}分钟 {seconds}秒." def getAllHostsHBase(alert_message=""): """ 从 HBase JMX 接口获取 RegionServer 运行时长信息 :return: 正常通知信息或 None """ count = 0 now_time = time.localtime(time.time()) formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time) # 将集合转换为列表,并进行排序 sorted_hosts = sorted(list(hostArr)) alert_message += "告警类型: reid 集群HBase告警通知 \n" + "告警信息: \n" for host in sorted_hosts: line_alarm = str(jmxGetHBaseStatus(host)) count += 1 alert_message += "\t" + str(count) + "." + line_alarm + "\n" alert_message += "\n告警时间:" + formatted_time print(alert_message) notify_msg(alert_message, dingding_url) def check_and_alert(): """ 检查运行时长,若小于 10 分钟且满足条件则立即发送 """ now_time = time.localtime(time.time()) formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time) # 将集合转换为列表,并进行排序 sorted_hosts = sorted(list(hostArr)) for host in sorted_hosts: alarm_str = jmxGetHBaseAlarmStatus(host) print(alarm_str) if alarm_str is not None and alarm_str != "": print("时间: ", formatted_time, "主机:", host, 'RegionServer 重启告警发出!') else: print("时间: ", formatted_time, "主机:", host, 'RegionServer 状 态 正 常!') def msg(text, api_url): """ 钉钉告警发出 通知具体负责人 :param text: 告警文本 :param api_url: 钉钉URL :return: 无返回值 """ json_text = { "msgtype": "text", "text": { "content": text }, "at": { "atMobiles": [""] } } requests.post(api_url, json.dumps(json_text), headers=headers).content def notify_msg(text, api_url): """ 钉钉告警发出 :param text: 告警文本 :param api_url: 钉钉URL :return: 无返回值 """ json_text = { "msgtype": "text", "text": { "content": text }, "at": { "atMobiles": [""] } } requests.post(api_url, json.dumps(json_text), headers=headers).content def correct_msg(text, api_url): """ 钉钉告警发出, 组件正常的告警信息,不艾特告警人 :param text: 告警文本 :param api_url: 钉钉URL :return: 无返回值 """ json_text = { "msgtype": "text", "text": { "content": text }, "at": { "atMobiles": [""] } } requests.post(api_url, json.dumps(json_text), headers=headers).content if __name__ == '__main__': # 设定整点执行常规告警任务 schedule.every().hour.at(":00").do(getAllHostsHBase) while True: check_and_alert() schedule.run_pending() time.sleep(10)钉钉告警通知:
【HBase】HBaseJMX接口监控信息实现钉钉告警由讯客互联软件开发栏目发布,感谢您对讯客互联的认可,以及对我们原创作品以及文章的青睐,非常欢迎各位朋友分享到个人网站或者朋友圈,但转载请说明文章出处“【HBase】HBaseJMX接口监控信息实现钉钉告警”
下一篇
什么是逻辑分析仪?