MonitorService.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. <?php
  2. namespace App\Module\ThirdParty\Services;
  3. use App\Module\ThirdParty\Models\ThirdPartyService;
  4. use App\Module\ThirdParty\Models\ThirdPartyMonitor;
  5. use App\Module\ThirdParty\Logics\ServiceLogic;
  6. use Illuminate\Support\Collection;
  7. use Illuminate\Support\Facades\Http;
  8. /**
  9. * 第三方服务监控服务类
  10. */
  11. class MonitorService
  12. {
  13. /**
  14. * 执行服务健康检查
  15. *
  16. * @param int|null $serviceId 服务ID,为null时检查所有服务
  17. * @return array
  18. */
  19. public static function performHealthCheck(?int $serviceId = null): array
  20. {
  21. $results = [];
  22. if ($serviceId) {
  23. $services = [ThirdPartyService::findOrFail($serviceId)];
  24. } else {
  25. $services = ThirdPartyService::where('status', 'ACTIVE')
  26. ->whereNotNull('health_check_url')
  27. ->get();
  28. }
  29. foreach ($services as $service) {
  30. $result = static::checkSingleService($service);
  31. $results[] = $result;
  32. // 记录监控结果
  33. static::recordMonitorResult($service, 'health', $result);
  34. // 更新服务健康状态
  35. $service->updateHealthStatus($result['status']);
  36. }
  37. return $results;
  38. }
  39. /**
  40. * 检查单个服务
  41. *
  42. * @param ThirdPartyService $service
  43. * @return array
  44. */
  45. protected static function checkSingleService(ThirdPartyService $service): array
  46. {
  47. $result = [
  48. 'service_id' => $service->id,
  49. 'service_code' => $service->code,
  50. 'service_name' => $service->name,
  51. 'status' => ThirdPartyMonitor::STATUS_UNKNOWN,
  52. 'response_time' => null,
  53. 'status_code' => null,
  54. 'error_message' => null,
  55. 'details' => [],
  56. ];
  57. try {
  58. // 检查服务基本状态
  59. $healthCheck = ServiceLogic::checkServiceHealth($service);
  60. if ($healthCheck['status'] !== 'healthy') {
  61. $result['status'] = ThirdPartyMonitor::STATUS_ERROR;
  62. $result['error_message'] = $healthCheck['message'];
  63. $result['details'] = $healthCheck['details'];
  64. return $result;
  65. }
  66. // 如果有健康检查URL,执行HTTP检查
  67. if ($service->health_check_url) {
  68. $httpResult = static::performHttpCheck($service);
  69. $result = array_merge($result, $httpResult);
  70. } else {
  71. // 没有健康检查URL,基于服务状态判断
  72. $result['status'] = ThirdPartyMonitor::STATUS_SUCCESS;
  73. $result['details']['message'] = '服务状态正常,无HTTP健康检查';
  74. }
  75. } catch (\Exception $e) {
  76. $result['status'] = ThirdPartyMonitor::STATUS_ERROR;
  77. $result['error_message'] = $e->getMessage();
  78. }
  79. return $result;
  80. }
  81. /**
  82. * 执行HTTP健康检查
  83. *
  84. * @param ThirdPartyService $service
  85. * @return array
  86. */
  87. protected static function performHttpCheck(ThirdPartyService $service): array
  88. {
  89. $result = [
  90. 'status' => ThirdPartyMonitor::STATUS_UNKNOWN,
  91. 'response_time' => null,
  92. 'status_code' => null,
  93. 'error_message' => null,
  94. 'details' => [],
  95. ];
  96. $startTime = microtime(true);
  97. try {
  98. $timeout = config('thirdparty.monitoring.health_check.timeout', 10);
  99. $response = Http::timeout($timeout)
  100. ->get($service->health_check_url);
  101. $responseTime = (int)((microtime(true) - $startTime) * 1000);
  102. $result['response_time'] = $responseTime;
  103. $result['status_code'] = $response->status();
  104. // 判断响应状态
  105. if ($response->successful()) {
  106. $result['status'] = ThirdPartyMonitor::STATUS_SUCCESS;
  107. // 检查响应时间
  108. $slowThreshold = config('thirdparty.monitoring.performance.slow_threshold', 2000);
  109. if ($responseTime > $slowThreshold) {
  110. $result['status'] = ThirdPartyMonitor::STATUS_WARNING;
  111. $result['details']['warning'] = "响应时间 {$responseTime}ms 超过阈值 {$slowThreshold}ms";
  112. }
  113. } else {
  114. $result['status'] = ThirdPartyMonitor::STATUS_ERROR;
  115. $result['error_message'] = "HTTP状态码: {$response->status()}";
  116. }
  117. $result['details']['response_body'] = $response->body();
  118. } catch (\Illuminate\Http\Client\ConnectionException $e) {
  119. $responseTime = (int)((microtime(true) - $startTime) * 1000);
  120. $result['response_time'] = $responseTime;
  121. $result['status'] = ThirdPartyMonitor::STATUS_TIMEOUT;
  122. $result['error_message'] = '连接超时: ' . $e->getMessage();
  123. } catch (\Exception $e) {
  124. $responseTime = (int)((microtime(true) - $startTime) * 1000);
  125. $result['response_time'] = $responseTime;
  126. $result['status'] = ThirdPartyMonitor::STATUS_ERROR;
  127. $result['error_message'] = $e->getMessage();
  128. }
  129. return $result;
  130. }
  131. /**
  132. * 记录监控结果
  133. *
  134. * @param ThirdPartyService $service
  135. * @param string $checkType
  136. * @param array $result
  137. * @return ThirdPartyMonitor
  138. */
  139. public static function recordMonitorResult(ThirdPartyService $service, string $checkType, array $result): ThirdPartyMonitor
  140. {
  141. return ThirdPartyMonitor::createMonitor([
  142. 'service_id' => $service->id,
  143. 'check_type' => $checkType,
  144. 'status' => $result['status'],
  145. 'response_time' => $result['response_time'],
  146. 'status_code' => $result['status_code'],
  147. 'error_message' => $result['error_message'],
  148. 'details' => $result['details'] ?? [],
  149. ]);
  150. }
  151. /**
  152. * 获取服务监控历史
  153. *
  154. * @param int $serviceId
  155. * @param array $options
  156. * @return Collection
  157. */
  158. public static function getServiceMonitorHistory(int $serviceId, array $options = []): Collection
  159. {
  160. $query = ThirdPartyMonitor::where('service_id', $serviceId);
  161. // 时间范围过滤
  162. if (isset($options['start_date'])) {
  163. $query->where('checked_at', '>=', $options['start_date']);
  164. }
  165. if (isset($options['end_date'])) {
  166. $query->where('checked_at', '<=', $options['end_date']);
  167. }
  168. // 检查类型过滤
  169. if (isset($options['check_type'])) {
  170. $query->where('check_type', $options['check_type']);
  171. }
  172. // 状态过滤
  173. if (isset($options['status'])) {
  174. $query->where('status', $options['status']);
  175. }
  176. // 排序和限制
  177. $limit = $options['limit'] ?? 100;
  178. $query->orderBy('checked_at', 'desc')->limit($limit);
  179. return $query->get();
  180. }
  181. /**
  182. * 获取监控统计信息
  183. *
  184. * @param int|null $serviceId
  185. * @param array $options
  186. * @return array
  187. */
  188. public static function getMonitorStats(?int $serviceId = null, array $options = []): array
  189. {
  190. $query = ThirdPartyMonitor::query();
  191. if ($serviceId) {
  192. $query->where('service_id', $serviceId);
  193. }
  194. // 时间范围
  195. $startDate = $options['start_date'] ?? now()->subDays(7);
  196. $endDate = $options['end_date'] ?? now();
  197. $query->whereBetween('checked_at', [$startDate, $endDate]);
  198. $total = $query->count();
  199. $successful = $query->where('status', ThirdPartyMonitor::STATUS_SUCCESS)->count();
  200. $warnings = $query->where('status', ThirdPartyMonitor::STATUS_WARNING)->count();
  201. $errors = $query->whereIn('status', [
  202. ThirdPartyMonitor::STATUS_ERROR,
  203. ThirdPartyMonitor::STATUS_TIMEOUT
  204. ])->count();
  205. // 平均响应时间
  206. $avgResponseTime = $query->whereNotNull('response_time')
  207. ->avg('response_time');
  208. // 可用性计算
  209. $availability = $total > 0 ? round(($successful / $total) * 100, 2) : 0;
  210. // 按状态统计
  211. $statusStats = $query->selectRaw('status, COUNT(*) as count')
  212. ->groupBy('status')
  213. ->pluck('count', 'status')
  214. ->toArray();
  215. // 按检查类型统计
  216. $typeStats = $query->selectRaw('check_type, COUNT(*) as count')
  217. ->groupBy('check_type')
  218. ->pluck('count', 'check_type')
  219. ->toArray();
  220. return [
  221. 'total_checks' => $total,
  222. 'successful' => $successful,
  223. 'warnings' => $warnings,
  224. 'errors' => $errors,
  225. 'availability' => $availability,
  226. 'avg_response_time' => $avgResponseTime ? round($avgResponseTime, 2) : null,
  227. 'by_status' => $statusStats,
  228. 'by_type' => $typeStats,
  229. 'period' => [
  230. 'start' => $startDate->toDateTimeString(),
  231. 'end' => $endDate->toDateTimeString(),
  232. ],
  233. ];
  234. }
  235. /**
  236. * 获取服务可用性报告
  237. *
  238. * @param int $serviceId
  239. * @param int $days
  240. * @return array
  241. */
  242. public static function getAvailabilityReport(int $serviceId, int $days = 30): array
  243. {
  244. $service = ThirdPartyService::findOrFail($serviceId);
  245. $startDate = now()->subDays($days);
  246. $monitors = ThirdPartyMonitor::where('service_id', $serviceId)
  247. ->where('checked_at', '>=', $startDate)
  248. ->orderBy('checked_at')
  249. ->get();
  250. $totalChecks = $monitors->count();
  251. $successfulChecks = $monitors->where('status', ThirdPartyMonitor::STATUS_SUCCESS)->count();
  252. $availability = $totalChecks > 0 ? round(($successfulChecks / $totalChecks) * 100, 2) : 0;
  253. // 按天统计
  254. $dailyStats = $monitors->groupBy(function ($monitor) {
  255. return $monitor->checked_at->format('Y-m-d');
  256. })->map(function ($dayMonitors) {
  257. $total = $dayMonitors->count();
  258. $successful = $dayMonitors->where('status', ThirdPartyMonitor::STATUS_SUCCESS)->count();
  259. $avgResponseTime = $dayMonitors->whereNotNull('response_time')->avg('response_time');
  260. return [
  261. 'total_checks' => $total,
  262. 'successful_checks' => $successful,
  263. 'availability' => $total > 0 ? round(($successful / $total) * 100, 2) : 0,
  264. 'avg_response_time' => $avgResponseTime ? round($avgResponseTime, 2) : null,
  265. ];
  266. });
  267. // 故障时间段
  268. $downtimes = [];
  269. $currentDowntime = null;
  270. foreach ($monitors as $monitor) {
  271. if ($monitor->status !== ThirdPartyMonitor::STATUS_SUCCESS) {
  272. if (!$currentDowntime) {
  273. $currentDowntime = [
  274. 'start' => $monitor->checked_at,
  275. 'end' => $monitor->checked_at,
  276. 'duration' => 0,
  277. 'reason' => $monitor->error_message,
  278. ];
  279. } else {
  280. $currentDowntime['end'] = $monitor->checked_at;
  281. }
  282. } else {
  283. if ($currentDowntime) {
  284. $currentDowntime['duration'] = $currentDowntime['end']->diffInMinutes($currentDowntime['start']);
  285. $downtimes[] = $currentDowntime;
  286. $currentDowntime = null;
  287. }
  288. }
  289. }
  290. // 如果最后还有未结束的故障
  291. if ($currentDowntime) {
  292. $currentDowntime['end'] = now();
  293. $currentDowntime['duration'] = $currentDowntime['end']->diffInMinutes($currentDowntime['start']);
  294. $downtimes[] = $currentDowntime;
  295. }
  296. return [
  297. 'service' => [
  298. 'id' => $service->id,
  299. 'name' => $service->name,
  300. 'code' => $service->code,
  301. ],
  302. 'period' => [
  303. 'days' => $days,
  304. 'start' => $startDate->toDateTimeString(),
  305. 'end' => now()->toDateTimeString(),
  306. ],
  307. 'summary' => [
  308. 'total_checks' => $totalChecks,
  309. 'successful_checks' => $successfulChecks,
  310. 'availability' => $availability,
  311. 'downtime_count' => count($downtimes),
  312. 'total_downtime_minutes' => array_sum(array_column($downtimes, 'duration')),
  313. ],
  314. 'daily_stats' => $dailyStats,
  315. 'downtimes' => $downtimes,
  316. ];
  317. }
  318. /**
  319. * 清理旧的监控记录
  320. *
  321. * @param int $days
  322. * @return int
  323. */
  324. public static function cleanupOldMonitorRecords(int $days = 90): int
  325. {
  326. return ThirdPartyMonitor::where('checked_at', '<', now()->subDays($days))->delete();
  327. }
  328. /**
  329. * 获取需要关注的服务
  330. *
  331. * @return array
  332. */
  333. public static function getServicesNeedingAttention(): array
  334. {
  335. $services = ThirdPartyService::where('status', 'ACTIVE')->get();
  336. $needingAttention = [];
  337. foreach ($services as $service) {
  338. $issues = [];
  339. // 检查最近的监控记录
  340. $recentMonitor = ThirdPartyMonitor::where('service_id', $service->id)
  341. ->orderBy('checked_at', 'desc')
  342. ->first();
  343. if (!$recentMonitor) {
  344. $issues[] = '缺少监控数据';
  345. } elseif ($recentMonitor->status !== ThirdPartyMonitor::STATUS_SUCCESS) {
  346. $issues[] = '最近检查失败: ' . $recentMonitor->error_message;
  347. }
  348. // 检查健康检查间隔
  349. if ($service->needsHealthCheck()) {
  350. $issues[] = '需要健康检查';
  351. }
  352. // 检查凭证状态
  353. $credential = $service->getActiveCredential();
  354. if (!$credential) {
  355. $issues[] = '缺少活跃凭证';
  356. } elseif ($credential->isExpiringSoon()) {
  357. $issues[] = '凭证即将过期';
  358. }
  359. if (!empty($issues)) {
  360. $needingAttention[] = [
  361. 'service' => $service,
  362. 'issues' => $issues,
  363. ];
  364. }
  365. }
  366. return $needingAttention;
  367. }
  368. }