阿里云自带的日志分析功能比较分散(主要在资源监控和运营报表),且在不同的Tab页,很不直观,于是有了分析离线日志的需求,让DeepSeek写代码程序,对话详情>>阿里云CDN日志分析系统开发
一、文件目录
准备好相应文件(夹)备用
cdn_log_analyzer/
├── app.py # 主程序
├── run.bat # 启动脚本
└── templates/
└── index.html # 前端模板
二、代码详情
app.py
import os
import sys
import re
import json
import pandas as pd
from collections import Counter
from datetime import datetime
from flask import Flask, request, jsonify, render_template, send_from_directory
from werkzeug.utils import secure_filename
import logging
import zipfile
import gzip
import webbrowser
import threading
import time
def get_base_path():
"""获取程序基础路径,兼容打包环境"""
if getattr(sys, 'frozen', False):
# 如果是打包后的exe
return os.path.dirname(sys.executable)
else:
# 如果是开发环境
return os.path.dirname(os.path.abspath(__file__))
# 设置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(os.path.join(get_base_path(), 'cdn_analyzer.log')),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
app = Flask(__name__)
# 设置目录路径
BASE_DIR = get_base_path()
app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 # 500MB限制
app.config['UPLOAD_FOLDER'] = os.path.join(BASE_DIR, 'uploads')
app.config['ANALYSIS_FOLDER'] = os.path.join(BASE_DIR, 'analysis_results')
app.config['ALLOWED_EXTENSIONS'] = {'log', 'txt', 'gz', 'zip'}
# 确保目录存在
for folder in [app.config['UPLOAD_FOLDER'], app.config['ANALYSIS_FOLDER']]:
os.makedirs(folder, exist_ok=True)
# HTTP状态码说明映射
HTTP_STATUS_CODES = {
'200': '成功',
'201': '已创建',
'204': '无内容',
'206': '部分内容',
'301': '永久重定向',
'302': '临时重定向',
'304': '未修改',
'400': '错误请求',
'401': '未授权',
'403': '禁止访问',
'404': '未找到',
'405': '方法不允许',
'408': '请求超时',
'429': '请求过多',
'499': '客户端关闭连接',
'500': '服务器内部错误',
'502': '错误网关',
'503': '服务不可用',
'504': '网关超时'
}
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']
def parse_cdn_log_line(line):
"""
解析单行阿里云CDN日志
格式: [时间] 客户端IP 代理IP 响应时间 "Referer" "请求方法 请求URL" 状态码 请求字节数 响应字节数 是否命中CDN节点 "User_Agent" "文件类型" 访问IP
"""
line = line.strip()
if not line or line.startswith('#'):
return None
try:
# 使用正则表达式解析日志行
# 匹配时间戳 [8/Jan/2025:20:16:54 +0800]
time_match = re.match(r'^\[(.*?)\]\s+(.*?)\s+(.*?)\s+(.*?)\s+"(.*?)"\s+"(.*?)"\s+(.*?)\s+(.*?)\s+(.*?)\s+(.*?)\s+"(.*?)"\s+"(.*?)"\s+(.*?)$', line)
if time_match:
groups = time_match.groups()
# 解析请求方法和URL
request_parts = groups[5].strip().split()
if len(request_parts) >= 2:
method = request_parts[0]
url = ' '.join(request_parts[1:]) # URL可能包含空格
else:
method = groups[5]
url = ''
log_data = {
'time': groups[0], # 访问时间
'client_ip': groups[1], # 客户端IP
'proxy_ip': groups[2], # 代理IP
'response_time': groups[3], # 响应时间(毫秒)
'referer': groups[4], # Referer
'method': method, # 请求方法
'url': url, # 请求URL
'status': groups[6], # HTTP状态码
'request_size': groups[7], # 请求字节数
'response_size': groups[8], # 响应字节数
'hit_miss': groups[9], # 是否命中CDN节点
'user_agent': groups[10], # User_Agent
'content_type': groups[11], # 文件类型
'access_ip': groups[12] # 访问IP
}
return log_data
# 尝试另一种格式(可能有些字段值包含空格)
return parse_cdn_log_line_alternative(line)
except Exception as e:
logger.warning(f"解析日志行失败: {line[:100]}... 错误: {str(e)}")
return None
def parse_cdn_log_line_alternative(line):
"""备选解析方法,处理字段值可能包含空格的情况"""
try:
# 分割引号外的部分
parts = []
in_quote = False
current_part = []
for char in line:
if char == '"':
in_quote = not in_quote
current_part.append(char)
elif char == ' ' and not in_quote:
if current_part:
parts.append(''.join(current_part))
current_part = []
else:
current_part.append(char)
if current_part:
parts.append(''.join(current_part))
if len(parts) < 13:
return None
# 提取各字段
time_str = parts[0].strip('[]')
# 解析请求方法和URL(第6个字段,索引5)
request_field = parts[5].strip('"')
request_parts = request_field.split()
if len(request_parts) >= 2:
method = request_parts[0]
url = ' '.join(request_parts[1:])
else:
method = request_field
url = ''
log_data = {
'time': time_str,
'client_ip': parts[1],
'proxy_ip': parts[2],
'response_time': parts[3],
'referer': parts[4].strip('"') if len(parts) > 4 else '-',
'method': method,
'url': url,
'status': parts[6] if len(parts) > 6 else '',
'request_size': parts[7] if len(parts) > 7 else '0',
'response_size': parts[8] if len(parts) > 8 else '0',
'hit_miss': parts[9] if len(parts) > 9 else 'Unknown',
'user_agent': parts[10].strip('"') if len(parts) > 10 else '',
'content_type': parts[11].strip('"') if len(parts) > 11 else '',
'access_ip': parts[12] if len(parts) > 12 else ''
}
return log_data
except Exception as e:
logger.warning(f"备选解析方法失败: {str(e)}")
return None
def detect_browser_from_ua(ua_string):
"""从User-Agent字符串检测浏览器类型"""
if not ua_string or ua_string == '-':
return '未知'
ua_lower = ua_string.lower()
# 检测爬虫/Bot
bot_keywords = ['bot', 'spider', 'crawler', 'scanner', 'sitemap', 'feed',
'python', 'java/', 'php/', 'ruby/', 'go-http', 'okhttp', 'headless']
for keyword in bot_keywords:
if keyword in ua_lower:
return '爬虫'
# 检测主流浏览器
if 'chrome' in ua_lower and 'chromium' not in ua_lower:
return 'Chrome'
elif 'firefox' in ua_lower:
return 'Firefox'
elif 'safari' in ua_lower and 'chrome' not in ua_lower and 'chromium' not in ua_lower:
return 'Safari'
elif 'edge' in ua_lower or 'edg/' in ua_lower:
return 'Edge'
elif 'opera' in ua_lower or 'opr/' in ua_lower:
return 'Opera'
elif 'msie' in ua_lower or 'trident' in ua_lower:
return 'IE'
elif 'vivaldi' in ua_lower:
return 'Vivaldi'
elif 'brave' in ua_lower:
return 'Brave'
elif 'curl' in ua_lower:
return 'cURL'
elif 'wget' in ua_lower:
return 'Wget'
else:
return '其他'
def extract_file_extension(url):
"""从URL提取文件扩展名"""
if not url or url == '-':
return '无'
# 移除查询参数
if '?' in url:
url = url.split('?')[0]
# 提取扩展名
if '.' in url:
# 获取最后一个点之后的部分
parts = url.split('.')
ext = parts[-1].lower()
# 常见文件类型映射
file_extensions = {
# 图片
'jpg': 'jpg', 'jpeg': 'jpg', 'png': 'png', 'gif': 'gif',
'webp': 'webp', 'bmp': 'bmp', 'ico': 'ico', 'svg': 'svg',
# 文本和代码
'css': 'css', 'js': 'js', 'json': 'json', 'txt': 'txt',
'html': 'html', 'htm': 'html', 'xml': 'xml', 'php': 'php',
# 文档
'pdf': 'pdf', 'doc': 'doc', 'docx': 'doc', 'xls': 'xls',
'xlsx': 'xls', 'ppt': 'ppt', 'pptx': 'ppt',
# 媒体
'mp3': 'mp3', 'mp4': 'mp4', 'avi': 'avi', 'mov': 'mov',
'wmv': 'wmv', 'flv': 'flv', 'mkv': 'mkv',
# 字体
'woff': 'woff', 'woff2': 'woff', 'ttf': 'ttf', 'eot': 'eot', 'otf': 'otf',
# 压缩文件
'zip': 'zip', 'rar': 'rar', 'gz': 'gz', 'tar': 'tar', '7z': '7z'
}
if ext in file_extensions:
return file_extensions[ext]
elif len(ext) <= 6: # 合理的扩展名长度
return ext
else:
return '未知'
# 检查是否是目录
if url.endswith('/') or url == '/':
return '目录'
# 检查是否是API端点
if '/api/' in url or url.startswith('/api'):
return 'API'
return '无'
def extract_hour_from_time(time_str):
"""从时间字符串中提取小时"""
try:
# 格式: 8/Jan/2025:20:16:54 +0800
# 提取小时部分
time_parts = time_str.split(':')
if len(time_parts) >= 2:
hour_str = time_parts[1]
return int(hour_str)
except:
pass
return 0
def analyze_log_files(filepaths):
"""分析多个日志文件"""
all_logs = []
total_lines = 0
skipped_lines = 0
for filepath in filepaths:
try:
lines = []
# 处理压缩文件
if filepath.endswith('.gz'):
with gzip.open(filepath, 'rt', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
elif filepath.endswith('.zip'):
with zipfile.ZipFile(filepath, 'r') as zf:
for filename in zf.namelist():
if filename.endswith('.log') or filename.endswith('.txt'):
with zf.open(filename, 'r') as f:
lines = f.read().decode('utf-8', errors='ignore').splitlines()
break
else:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
for line in lines:
total_lines += 1
log_data = parse_cdn_log_line(line)
if log_data:
# 提取额外信息
log_data['browser'] = detect_browser_from_ua(log_data.get('user_agent', ''))
log_data['file_type'] = extract_file_extension(log_data.get('url', ''))
log_data['hour'] = extract_hour_from_time(log_data.get('time', ''))
all_logs.append(log_data)
else:
skipped_lines += 1
except Exception as e:
logger.error(f"处理文件 {filepath} 时出错: {str(e)}")
continue
if not all_logs:
return {"error": "没有找到有效日志数据"}
return perform_analysis(all_logs, total_lines, skipped_lines)
def perform_analysis(logs, total_lines, skipped_lines):
"""执行详细分析"""
# 1. 响应码分布
status_counter = Counter(log['status'] for log in logs)
status_stats = []
for status, count in sorted(status_counter.items(), key=lambda x: x[1], reverse=True):
percentage = (count / len(logs)) * 100 if logs else 0
description = HTTP_STATUS_CODES.get(status, '未知')
status_stats.append({
'status': status,
'description': description,
'count': count,
'percentage': round(percentage, 2)
})
# 2. 浏览器分布
browser_counter = Counter(log['browser'] for log in logs)
browser_stats = []
for browser, count in sorted(browser_counter.items(), key=lambda x: x[1], reverse=True):
percentage = (count / len(logs)) * 100 if logs else 0
browser_stats.append({
'browser': browser,
'count': count,
'percentage': round(percentage, 2)
})
# 3. IP访问统计 - 修复独立IP计数问题
ip_counter = Counter(log['client_ip'] for log in logs)
ip_stats = []
# 保存完整的IP统计用于计算总数
all_ip_stats = []
for ip, count in ip_counter.most_common():
percentage = (count / len(logs)) * 100 if logs else 0
ip_item = {
'ip': ip,
'count': count,
'percentage': round(percentage, 2)
}
all_ip_stats.append(ip_item)
# 只取前20个用于表格显示
ip_stats = all_ip_stats[:20]
# 4. 热门访问资源
url_counter = Counter(log['url'] for log in logs)
resource_stats = []
for url, count in url_counter.most_common(30):
# 缩短过长的URL
display_url = url
if len(url) > 60:
display_url = url[:57] + '...'
percentage = (count / len(logs)) * 100 if logs else 0
resource_stats.append({
'resource': display_url,
'full_url': url,
'count': count,
'percentage': round(percentage, 2)
})
# 5. 缓存状态分析
hit_counter = Counter(log.get('hit_miss', 'Unknown') for log in logs)
cache_stats = []
total_cache = sum(hit_counter.values())
# 缓存状态说明
cache_descriptions = {
'HIT': '缓存命中',
'MISS': '缓存未命中',
'EXPIRED': '缓存过期',
'REVALIDATED': '重新验证',
'BYPASS': '绕过缓存',
'Unknown': '未知状态'
}
for hit_status, count in hit_counter.items():
percentage = (count / total_cache) * 100 if total_cache > 0 else 0
description = cache_descriptions.get(hit_status, '未缓存或无缓存信息')
cache_stats.append({
'status': hit_status,
'description': description,
'count': count,
'percentage': round(percentage, 2)
})
# 6. 24小时访问分布
hour_counter = Counter(log['hour'] for log in logs)
hourly_stats = []
for hour in range(24):
count = hour_counter.get(hour, 0)
percentage = (count / len(logs)) * 100 if logs else 0
hourly_stats.append({
'hour': hour,
'hour_display': f"{hour:02d}:00",
'count': count,
'percentage': round(percentage, 2)
})
# 7. 文件类型分布
filetype_counter = Counter(log['file_type'] for log in logs)
filetype_stats = []
for filetype, count in sorted(filetype_counter.items(), key=lambda x: x[1], reverse=True):
percentage = (count / len(logs)) * 100 if logs else 0
filetype_stats.append({
'file_type': filetype,
'count': count,
'percentage': round(percentage, 2)
})
# 8. 请求方法分布
method_counter = Counter(log.get('method', '') for log in logs)
method_stats = []
for method, count in method_counter.items():
if method: # 跳过空方法
percentage = (count / len(logs)) * 100 if logs else 0
method_stats.append({
'method': method,
'count': count,
'percentage': round(percentage, 2)
})
# 9. Referer统计
referer_counter = Counter(log.get('referer', '-') for log in logs)
referer_stats = []
for referer, count in referer_counter.most_common(10):
percentage = (count / len(logs)) * 100 if logs else 0
referer_stats.append({
'referer': referer if referer != '-' else '直接访问',
'count': count,
'percentage': round(percentage, 2)
})
# 10. 响应时间统计
response_times = [int(log.get('response_time', 0)) for log in logs if log.get('response_time', '').isdigit()]
avg_response_time = sum(response_times) / len(response_times) if response_times else 0
# 构建完整分析结果
analysis_result = {
'summary': {
'total_lines': total_lines,
'valid_logs': len(logs),
'skipped_lines': skipped_lines,
'success_rate': round((len(logs) / total_lines * 100), 2) if total_lines > 0 else 0,
'analysis_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'avg_response_time': round(avg_response_time, 2),
'total_bandwidth': sum([int(log.get('response_size', 0)) for log in logs if log.get('response_size', '').isdigit()])
},
'status_distribution': status_stats,
'browser_distribution': browser_stats,
'ip_distribution': {
'total_unique': len(ip_counter), # 真正的独立IP总数
'top_20': ip_stats, # 前20个IP用于表格显示
},
'resource_distribution': resource_stats[:20], # 只取前20
'cache_distribution': cache_stats,
'hourly_distribution': hourly_stats,
'filetype_distribution': filetype_stats,
'method_distribution': method_stats,
'referer_distribution': referer_stats,
'response_time_stats': {
'avg': round(avg_response_time, 2),
'max': max(response_times) if response_times else 0,
'min': min(response_times) if response_times else 0,
'total_requests': len(response_times)
}
}
# 保存分析结果
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
result_file = os.path.join(app.config['ANALYSIS_FOLDER'], f'analysis_{timestamp}.json')
with open(result_file, 'w', encoding='utf-8') as f:
json.dump(analysis_result, f, ensure_ascii=False, indent=2)
return analysis_result
@app.route('/')
def index():
return render_template('index.html')
@app.route('/upload', methods=['POST'])
def upload_files():
"""处理文件上传"""
if 'files' not in request.files:
return jsonify({'error': '没有选择文件'}), 400
files = request.files.getlist('files')
if not files or files[0].filename == '':
return jsonify({'error': '没有选择文件'}), 400
saved_files = []
for file in files:
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(filepath)
saved_files.append(filepath)
if not saved_files:
return jsonify({'error': '没有有效的日志文件'}), 400
try:
# 分析日志文件
analysis_results = analyze_log_files(saved_files)
return jsonify(analysis_results)
except Exception as e:
logger.error(f"分析日志时出错: {str(e)}")
return jsonify({'error': f'分析失败: {str(e)}'}), 500
@app.route('/download/<filename>')
def download_file(filename):
"""下载分析结果"""
return send_from_directory(app.config['ANALYSIS_FOLDER'], filename)
@app.route('/clear', methods=['POST'])
def clear_files():
"""清除上传的文件"""
try:
for folder in [app.config['UPLOAD_FOLDER'], app.config['ANALYSIS_FOLDER']]:
if os.path.exists(folder):
for filename in os.listdir(folder):
file_path = os.path.join(folder, filename)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
except Exception as e:
logger.error(f"删除文件 {file_path} 时出错: {e}")
return jsonify({'success': True})
except Exception as e:
return jsonify({'error': str(e)}), 500
def open_browser():
"""打开浏览器"""
time.sleep(2) # 等待Flask启动
webbrowser.open('http://127.0.0.1:5000')
if __name__ == '__main__':
# 在单独的线程中打开浏览器
threading.Thread(target=open_browser, daemon=True).start()
# 启动Flask应用
try:
app.run(debug=False, host='127.0.0.1', port=5000)
except Exception as e:
logger.error(f"启动应用失败: {str(e)}")
input("按任意键退出...")
index.html
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>阿里云CDN日志分析系统</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-datalabels@2.0.0"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
color: #333;
}
.container {
max-width: 1600px;
margin: 0 auto;
background: white;
border-radius: 20px;
box-shadow: 0 20px 60px rgba(0,0,0,0.3);
overflow: hidden;
}
.header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 30px 40px;
text-align: center;
}
.header h1 {
font-size: 2.5em;
margin-bottom: 10px;
display: flex;
align-items: center;
justify-content: center;
gap: 15px;
}
.upload-section {
padding: 40px;
background: #f8f9fa;
border-bottom: 1px solid #e9ecef;
}
.upload-box {
border: 3px dashed #667eea;
border-radius: 15px;
padding: 50px 30px;
text-align: center;
background: white;
cursor: pointer;
transition: all 0.3s ease;
}
.upload-box:hover {
background: #f0f2ff;
border-color: #764ba2;
transform: translateY(-2px);
}
.upload-box i {
font-size: 60px;
color: #667eea;
margin-bottom: 20px;
}
.upload-box h3 {
color: #333;
margin-bottom: 15px;
font-size: 1.4em;
}
.file-input {
display: none;
}
.file-list {
margin-top: 25px;
max-height: 250px;
overflow-y: auto;
background: white;
border-radius: 10px;
padding: 15px;
border: 1px solid #e9ecef;
}
.file-item {
background: #f8f9fa;
padding: 12px 15px;
margin-bottom: 8px;
border-radius: 8px;
border-left: 4px solid #667eea;
display: flex;
justify-content: space-between;
align-items: center;
}
.buttons {
margin-top: 25px;
display: flex;
gap: 15px;
justify-content: center;
}
.btn {
padding: 14px 35px;
border: none;
border-radius: 10px;
font-size: 16px;
font-weight: 600;
cursor: pointer;
transition: all 0.3s ease;
display: flex;
align-items: center;
gap: 10px;
min-width: 160px;
justify-content: center;
}
.btn-primary {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
}
.btn-success {
background: linear-gradient(135deg, #00b09b 0%, #96c93d 100%);
color: white;
}
.btn-danger {
background: linear-gradient(135deg, #ff416c 0%, #ff4b2b 100%);
color: white;
}
.btn:hover {
transform: translateY(-3px);
box-shadow: 0 7px 20px rgba(0,0,0,0.15);
}
.btn:disabled {
opacity: 0.5;
cursor: not-allowed;
transform: none;
}
.analysis-section {
padding: 30px;
}
.summary-cards {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
gap: 25px;
margin-bottom: 40px;
}
.card {
background: white;
border-radius: 15px;
padding: 25px;
box-shadow: 0 5px 20px rgba(0,0,0,0.08);
border-top: 5px solid;
transition: transform 0.3s ease;
}
.card:hover {
transform: translateY(-8px);
}
.card h3 {
color: #333;
margin-bottom: 15px;
font-size: 1.1em;
display: flex;
align-items: center;
gap: 10px;
}
.card .value {
font-size: 2.5em;
font-weight: 800;
margin-bottom: 10px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
.dashboard-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
gap: 30px;
margin-bottom: 40px;
}
.dashboard-card {
background: white;
border-radius: 15px;
padding: 25px;
box-shadow: 0 5px 20px rgba(0,0,0,0.08);
transition: transform 0.3s ease;
}
.dashboard-card:hover {
transform: translateY(-5px);
}
.dashboard-card h3 {
color: #333;
margin-bottom: 20px;
font-size: 1.3em;
display: flex;
align-items: center;
gap: 12px;
padding-bottom: 15px;
border-bottom: 2px solid #f0f2ff;
}
table {
width: 100%;
border-collapse: collapse;
margin-top: 10px;
}
th {
background: #f8f9fa;
color: #495057;
font-weight: 600;
padding: 16px 12px;
text-align: left;
border-bottom: 2px solid #e9ecef;
font-size: 0.95em;
}
td {
padding: 14px 12px;
border-bottom: 1px solid #e9ecef;
color: #495057;
font-size: 0.95em;
}
tr:hover {
background: #f8f9fa;
}
.percentage-bar {
background: #e9ecef;
height: 10px;
border-radius: 5px;
margin-top: 8px;
overflow: hidden;
width: 100%;
}
.percentage-fill {
height: 100%;
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
border-radius: 5px;
transition: width 0.5s ease;
}
.status-badge {
display: inline-block;
padding: 4px 12px;
border-radius: 20px;
font-size: 0.85em;
font-weight: 600;
text-align: center;
min-width: 60px;
}
.status-2xx { background: #d4edda; color: #155724; }
.status-3xx { background: #d1ecf1; color: #0c5460; }
.status-4xx { background: #fff3cd; color: #856404; }
.status-5xx { background: #f8d7da; color: #721c24; }
.charts-container {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(450px, 1fr));
gap: 30px;
margin-bottom: 40px;
}
.chart-box {
background: white;
border-radius: 15px;
padding: 25px;
box-shadow: 0 5px 20px rgba(0,0,0,0.08);
}
.chart-box h3 {
color: #333;
margin-bottom: 20px;
font-size: 1.2em;
display: flex;
align-items: center;
gap: 10px;
}
.chart-container {
position: relative;
height: 300px;
width: 100%;
}
.loading {
display: none;
text-align: center;
padding: 60px;
background: rgba(255,255,255,0.95);
border-radius: 15px;
position: fixed;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
z-index: 1000;
box-shadow: 0 10px 40px rgba(0,0,0,0.1);
}
.error {
background: linear-gradient(135deg, #ff416c 0%, #ff4b2b 100%);
color: white;
padding: 20px;
border-radius: 10px;
margin: 20px 0;
display: none;
align-items: center;
gap: 15px;
}
.success {
background: linear-gradient(135deg, #00b09b 0%, #96c93d 100%);
color: white;
padding: 20px;
border-radius: 10px;
margin: 20px 0;
display: none;
align-items: center;
gap: 15px;
}
.footer {
text-align: center;
padding: 25px;
color: #666;
border-top: 1px solid #e9ecef;
background: #f8f9fa;
}
.cache-hit { color: #28a745; font-weight: bold; }
.cache-miss { color: #dc3545; font-weight: bold; }
@media (max-width: 768px) {
.dashboard-grid,
.charts-container {
grid-template-columns: 1fr;
}
.dashboard-card {
min-width: 100%;
}
}
.tooltip {
position: relative;
cursor: help;
}
.tooltip-text {
visibility: hidden;
width: 300px;
background-color: #333;
color: white;
text-align: left;
border-radius: 6px;
padding: 12px;
position: absolute;
z-index: 1;
bottom: 125%;
left: 50%;
transform: translateX(-50%);
opacity: 0;
transition: opacity 0.3s;
font-size: 0.9em;
line-height: 1.4;
}
.tooltip:hover .tooltip-text {
visibility: visible;
opacity: 1;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1><i class="fas fa-cloud"></i> 阿里云CDN日志分析系统</h1>
<p>上传阿里云CDN离线日志文件,系统将自动分析并展示专业统计报告</p>
</div>
<div class="upload-section">
<div class="upload-box" id="dropZone" onclick="document.getElementById('fileInput').click()">
<i class="fas fa-cloud-upload-alt"></i>
<h3>点击选择或拖放日志文件</h3>
<p>支持阿里云CDN标准日志格式 (.log, .txt, .gz, .zip)</p>
<input type="file" id="fileInput" class="file-input" multiple accept=".log,.txt,.gz,.zip">
</div>
<div class="file-list" id="fileList"></div>
<div class="buttons">
<button class="btn btn-primary" onclick="document.getElementById('fileInput').click()">
<i class="fas fa-folder-open"></i> 选择文件
</button>
<button class="btn btn-success" id="analyzeBtn" onclick="analyzeLogs()" disabled>
<i class="fas fa-chart-bar"></i> 开始分析
</button>
<button class="btn btn-danger" onclick="clearAll()">
<i class="fas fa-trash-alt"></i> 清除所有
</button>
</div>
<div class="loading" id="loading">
<i class="fas fa-spinner fa-spin"></i>
<h3>正在分析日志文件...</h3>
<p>请稍候,系统正在处理和分析数据</p>
</div>
<div class="error" id="errorMessage" style="display: none;">
<i class="fas fa-exclamation-triangle" style="font-size: 24px;"></i>
<span id="errorText"></span>
</div>
<div class="success" id="successMessage" style="display: none;">
<i class="fas fa-check-circle" style="font-size: 24px;"></i>
<span id="successText"></span>
</div>
</div>
<div class="analysis-section" id="analysisSection" style="display: none;">
<div class="summary-cards" id="summaryCards"></div>
<div class="dashboard-grid">
<!-- 响应码分布 -->
<div class="dashboard-card">
<h3><i class="fas fa-exchange-alt"></i> 响应码分布</h3>
<table id="statusTable">
<thead>
<tr>
<th>状态码</th>
<th>说明</th>
<th>数量</th>
<th>占比</th>
</tr>
</thead>
<tbody id="statusTableBody"></tbody>
</table>
</div>
<!-- 浏览器分布 -->
<div class="dashboard-card">
<h3><i class="fas fa-globe"></i> 浏览器分布</h3>
<table id="browserTable">
<thead>
<tr>
<th>浏览器</th>
<th>数量</th>
<th>占比</th>
</tr>
</thead>
<tbody id="browserTableBody"></tbody>
</table>
</div>
</div>
<div class="dashboard-grid">
<!-- 热门访问IP -->
<div class="dashboard-card">
<h3><i class="fas fa-map-marker-alt"></i> 热门访问IP</h3>
<table id="ipTable">
<thead>
<tr>
<th>IP地址</th>
<th>访问次数</th>
<th>占比</th>
</tr>
</thead>
<tbody id="ipTableBody"></tbody>
</table>
</div>
<!-- 热门访问资源 -->
<div class="dashboard-card">
<h3><i class="fas fa-file-alt"></i> 热门访问资源</h3>
<table id="resourceTable">
<thead>
<tr>
<th>资源路径</th>
<th>访问次数</th>
<th>占比</th>
</tr>
</thead>
<tbody id="resourceTableBody"></tbody>
</table>
</div>
</div>
<div class="dashboard-grid">
<!-- 缓存状态 -->
<div class="dashboard-card">
<h3><i class="fas fa-bolt"></i> 缓存状态</h3>
<table id="cacheTable">
<thead>
<tr>
<th>缓存状态</th>
<th>说明</th>
<th>数量</th>
<th>占比</th>
</tr>
</thead>
<tbody id="cacheTableBody"></tbody>
</table>
</div>
<!-- 文件类型分布 -->
<div class="dashboard-card">
<h3><i class="fas fa-folder"></i> 文件类型分布</h3>
<table id="filetypeTable">
<thead>
<tr>
<th>文件类型</th>
<th>数量</th>
<th>占比</th>
</tr>
</thead>
<tbody id="filetypeTableBody"></tbody>
</table>
</div>
</div>
<div class="charts-container">
<div class="chart-box">
<h3><i class="fas fa-chart-pie"></i> 响应码分类分布</h3>
<div class="chart-container">
<canvas id="statusChart"></canvas>
</div>
</div>
<div class="chart-box">
<h3><i class="fas fa-chart-bar"></i> 24小时访问分布</h3>
<div class="chart-container">
<canvas id="hourlyChart"></canvas>
</div>
</div>
</div>
<div class="dashboard-card" style="margin-top: 30px;">
<h3><i class="fas fa-info-circle"></i> 分析报告摘要</h3>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 20px; padding: 15px;">
<div id="methodSummary"></div>
<div id="refererSummary"></div>
<div id="responseTimeSummary"></div>
</div>
</div>
</div>
<div class="footer">
<p>阿里云CDN日志分析系统 | 基于阿里云CDN离线日志格式分析</p>
</div>
</div>
<script>
let selectedFiles = [];
let analysisData = null;
let charts = {};
// 文件选择处理
document.getElementById('fileInput').addEventListener('change', function(e) {
handleFiles(e.target.files);
});
// 拖放功能
const dropZone = document.getElementById('dropZone');
dropZone.addEventListener('dragover', (e) => {
e.preventDefault();
dropZone.style.background = '#f0f2ff';
});
dropZone.addEventListener('dragleave', (e) => {
e.preventDefault();
dropZone.style.background = 'white';
});
dropZone.addEventListener('drop', (e) => {
e.preventDefault();
dropZone.style.background = 'white';
handleFiles(e.dataTransfer.files);
});
function handleFiles(files) {
selectedFiles = Array.from(files);
updateFileList();
document.getElementById('analyzeBtn').disabled = selectedFiles.length === 0;
showSuccess(`已选择 ${selectedFiles.length} 个文件`);
}
function updateFileList() {
const fileList = document.getElementById('fileList');
fileList.innerHTML = '';
if (selectedFiles.length === 0) {
fileList.innerHTML = '<div style="text-align: center; padding: 20px; color: #999;">暂无文件</div>';
return;
}
selectedFiles.forEach((file, index) => {
const div = document.createElement('div');
div.className = 'file-item';
div.innerHTML = `
<div style="display: flex; align-items: center; gap: 10px;">
<i class="fas fa-file-alt" style="color: #667eea;"></i>
<span>${file.name}</span>
</div>
<button onclick="removeFile(${index})" style="background:none;border:none;color:#dc3545;cursor:pointer;">
<i class="fas fa-times"></i>
</button>
`;
fileList.appendChild(div);
});
}
function removeFile(index) {
selectedFiles.splice(index, 1);
updateFileList();
document.getElementById('analyzeBtn').disabled = selectedFiles.length === 0;
showSuccess(`已移除文件,剩余 ${selectedFiles.length} 个文件`);
}
async function analyzeLogs() {
if (selectedFiles.length === 0) {
showError('请先选择要分析的日志文件');
return;
}
showLoading(true);
hideMessages();
const formData = new FormData();
selectedFiles.forEach(file => {
formData.append('files', file);
});
try {
const response = await fetch('/upload', {
method: 'POST',
body: formData
});
const data = await response.json();
if (response.ok) {
analysisData = data;
displayAnalysisResults();
showSuccess('日志分析完成!');
} else {
throw new Error(data.error || '分析失败');
}
} catch (error) {
showError(`分析失败: ${error.message}`);
} finally {
showLoading(false);
}
}
function displayAnalysisResults() {
document.getElementById('analysisSection').style.display = 'block';
updateSummaryCards();
updateStatusTable();
updateBrowserTable();
updateIpTable();
updateResourceTable();
updateCacheTable();
updateFiletypeTable();
updateCharts();
updateAnalysisSummary();
document.getElementById('analysisSection').scrollIntoView({
behavior: 'smooth'
});
}
function updateSummaryCards() {
const summary = analysisData.summary;
const uniqueIps = analysisData.ip_distribution.total_unique;
const html = `
<div class="card">
<h3><i class="fas fa-file-alt"></i> 总请求数</h3>
<div class="value">${summary.valid_logs.toLocaleString()}</div>
<div class="label">解析成功率: ${summary.success_rate}%</div>
</div>
<div class="card">
<h3><i class="fas fa-users"></i> 独立IP数</h3>
<div class="value">${uniqueIps.toLocaleString()}</div>
<div class="label">不同客户端IP数量</div>
</div>
<div class="card">
<h3><i class="fas fa-tachometer-alt"></i> 平均响应时间</h3>
<div class="value">${summary.avg_response_time}ms</div>
<div class="label">请求平均处理时间</div>
</div>
<div class="card">
<h3><i class="fas fa-database"></i> 总流量</h3>
<div class="value">${formatBytes(summary.total_bandwidth)}</div>
<div class="label">响应数据总量</div>
</div>
`;
document.getElementById('summaryCards').innerHTML = html;
}
function formatBytes(bytes) {
if (bytes === 0) return '0 Bytes';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
function updateStatusTable() {
const tbody = document.getElementById('statusTableBody');
tbody.innerHTML = '';
analysisData.status_distribution.forEach(item => {
let statusClass = '';
if (item.status.startsWith('2')) statusClass = 'status-2xx';
else if (item.status.startsWith('3')) statusClass = 'status-3xx';
else if (item.status.startsWith('4')) statusClass = 'status-4xx';
else if (item.status.startsWith('5')) statusClass = 'status-5xx';
const row = document.createElement('tr');
row.innerHTML = `
<td><span class="status-badge ${statusClass}">${item.status}</span></td>
<td>${item.description}</td>
<td>${item.count.toLocaleString()}</td>
<td>
<div>${item.percentage}%</div>
<div class="percentage-bar">
<div class="percentage-fill" style="width: ${Math.min(item.percentage * 2, 100)}%"></div>
</div>
</td>
`;
tbody.appendChild(row);
});
}
function updateBrowserTable() {
const tbody = document.getElementById('browserTableBody');
tbody.innerHTML = '';
analysisData.browser_distribution.forEach(item => {
const row = document.createElement('tr');
row.innerHTML = `
<td>${item.browser}</td>
<td>${item.count.toLocaleString()}</td>
<td>
<div>${item.percentage}%</div>
<div class="percentage-bar">
<div class="percentage-fill" style="width: ${Math.min(item.percentage * 2, 100)}%"></div>
</div>
</td>
`;
tbody.appendChild(row);
});
}
function updateIpTable() {
const tbody = document.getElementById('ipTableBody');
tbody.innerHTML = '';
// 显示前10个IP
const topIps = analysisData.ip_distribution.top_20.slice(0, 10);
topIps.forEach(item => {
const row = document.createElement('tr');
row.innerHTML = `
<td>${item.ip}</td>
<td>${item.count.toLocaleString()}</td>
<td>
<div>${item.percentage}%</div>
<div class="percentage-bar">
<div class="percentage-fill" style="width: ${Math.min(item.percentage * 20, 100)}%"></div>
</div>
</td>
`;
tbody.appendChild(row);
});
}
function updateResourceTable() {
const tbody = document.getElementById('resourceTableBody');
tbody.innerHTML = '';
// 显示前10个资源
const topResources = analysisData.resource_distribution.slice(0, 10);
topResources.forEach(item => {
const row = document.createElement('tr');
row.innerHTML = `
<td class="tooltip" title="${item.full_url}">
${item.resource}
<span class="tooltip-text">完整路径: ${item.full_url}</span>
</td>
<td>${item.count.toLocaleString()}</td>
<td>
<div>${item.percentage}%</div>
<div class="percentage-bar">
<div class="percentage-fill" style="width: ${Math.min(item.percentage * 2, 100)}%"></div>
</div>
</td>
`;
tbody.appendChild(row);
});
}
function updateCacheTable() {
const tbody = document.getElementById('cacheTableBody');
tbody.innerHTML = '';
analysisData.cache_distribution.forEach(item => {
const row = document.createElement('tr');
row.innerHTML = `
<td>
<span class="${item.status === 'HIT' ? 'cache-hit' : 'cache-miss'}">
${item.status}
</span>
</td>
<td>${item.description}</td>
<td>${item.count.toLocaleString()}</td>
<td>
<div>${item.percentage}%</div>
<div class="percentage-bar">
<div class="percentage-fill" style="width: ${Math.min(item.percentage * 2, 100)}%"></div>
</div>
</td>
`;
tbody.appendChild(row);
});
}
function updateFiletypeTable() {
const tbody = document.getElementById('filetypeTableBody');
tbody.innerHTML = '';
analysisData.filetype_distribution.forEach(item => {
const row = document.createElement('tr');
row.innerHTML = `
<td>${item.file_type}</td>
<td>${item.count.toLocaleString()}</td>
<td>
<div>${item.percentage}%</div>
<div class="percentage-bar">
<div class="percentage-fill" style="width: ${Math.min(item.percentage * 2, 100)}%"></div>
</div>
</td>
`;
tbody.appendChild(row);
});
}
function updateCharts() {
// 销毁之前的图表
Object.values(charts).forEach(chart => {
if (chart) chart.destroy();
});
charts = {};
// 响应码分类饼图
const statusCtx = document.getElementById('statusChart').getContext('2d');
// 按状态码分类统计
const statusCategories = {
'2xx': 0, '3xx': 0, '4xx': 0, '5xx': 0, '其他': 0
};
analysisData.status_distribution.forEach(item => {
const status = item.status;
if (status.startsWith('2')) statusCategories['2xx'] += item.count;
else if (status.startsWith('3')) statusCategories['3xx'] += item.count;
else if (status.startsWith('4')) statusCategories['4xx'] += item.count;
else if (status.startsWith('5')) statusCategories['5xx'] += item.count;
else statusCategories['其他'] += item.count;
});
charts.status = new Chart(statusCtx, {
type: 'doughnut',
data: {
labels: Object.keys(statusCategories).map(key => `${key}`),
datasets: [{
data: Object.values(statusCategories),
backgroundColor: [
'#28a745', '#17a2b8', '#ffc107', '#dc3545', '#6c757d'
],
borderWidth: 2,
borderColor: '#fff'
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: {
legend: {
position: 'right'
},
tooltip: {
callbacks: {
label: function(context) {
const total = context.dataset.data.reduce((a, b) => a + b, 0);
const percentage = ((context.parsed / total) * 100).toFixed(1);
return `${context.label}: ${context.parsed}次 (${percentage}%)`;
}
}
}
}
}
});
// 24小时访问分布柱状图
const hourlyCtx = document.getElementById('hourlyChart').getContext('2d');
const hours = analysisData.hourly_distribution.map(item => item.hour_display);
const counts = analysisData.hourly_distribution.map(item => item.count);
charts.hourly = new Chart(hourlyCtx, {
type: 'bar',
data: {
labels: hours,
datasets: [{
label: '请求次数',
data: counts,
backgroundColor: 'rgba(102, 126, 234, 0.7)',
borderColor: 'rgba(102, 126, 234, 1)',
borderWidth: 1
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: {
legend: {
display: false
}
},
scales: {
y: {
beginAtZero: true,
title: {
display: true,
text: '请求次数'
}
},
x: {
title: {
display: true,
text: '时间(小时)'
}
}
}
}
});
}
function updateAnalysisSummary() {
// 请求方法统计
let methodHtml = '<h4 style="color: #667eea; margin-bottom: 10px;">请求方法分布</h4>';
analysisData.method_distribution.forEach(item => {
methodHtml += `
<div style="margin-bottom: 8px; font-size: 0.9em;">
<div style="display: flex; justify-content: space-between;">
<span>${item.method}</span>
<span>${item.count} (${item.percentage}%)</span>
</div>
<div class="percentage-bar" style="margin-top: 3px;">
<div class="percentage-fill" style="width: ${Math.min(item.percentage * 3, 100)}%"></div>
</div>
</div>
`;
});
document.getElementById('methodSummary').innerHTML = methodHtml;
// Referer统计
let refererHtml = '<h4 style="color: #667eea; margin-bottom: 10px;">Referer来源</h4>';
analysisData.referer_distribution.forEach(item => {
refererHtml += `
<div style="margin-bottom: 8px; font-size: 0.9em;">
<div style="display: flex; justify-content: space-between;">
<span title="${item.referer}">${item.referer.length > 20 ? item.referer.substring(0, 20) + '...' : item.referer}</span>
<span>${item.count}</span>
</div>
<div class="percentage-bar" style="margin-top: 3px;">
<div class="percentage-fill" style="width: ${Math.min(item.percentage * 3, 100)}%"></div>
</div>
</div>
`;
});
document.getElementById('refererSummary').innerHTML = refererHtml;
// 响应时间统计
const respTime = analysisData.response_time_stats;
const respTimeHtml = `
<h4 style="color: #667eea; margin-bottom: 10px;">响应时间统计</h4>
<div style="font-size: 0.9em; line-height: 1.6;">
<div>平均响应: ${respTime.avg}ms</div>
<div>最大响应: ${respTime.max}ms</div>
<div>最小响应: ${respTime.min}ms</div>
<div>请求总数: ${respTime.total_requests}</div>
</div>
`;
document.getElementById('responseTimeSummary').innerHTML = respTimeHtml;
}
async function clearAll() {
if (!confirm('确定要清除所有文件和分析结果吗?')) {
return;
}
try {
const response = await fetch('/clear', { method: 'POST' });
if (response.ok) {
selectedFiles = [];
analysisData = null;
document.getElementById('fileList').innerHTML = '<div style="text-align: center; padding: 20px; color: #999;">暂无文件</div>';
document.getElementById('analyzeBtn').disabled = true;
document.getElementById('analysisSection').style.display = 'none';
showSuccess('已清除所有文件和结果');
}
} catch (error) {
showError('清除失败');
}
}
function showSuccess(message) {
const element = document.getElementById('successMessage');
const text = document.getElementById('successText');
text.textContent = message;
element.style.display = 'flex';
setTimeout(() => {
element.style.display = 'none';
}, 5000);
}
function showError(message) {
const element = document.getElementById('errorMessage');
const text = document.getElementById('errorText');
text.textContent = message;
element.style.display = 'flex';
setTimeout(() => {
element.style.display = 'none';
}, 10000);
}
function hideMessages() {
document.getElementById('errorMessage').style.display = 'none';
document.getElementById('successMessage').style.display = 'none';
}
function showLoading(show) {
document.getElementById('loading').style.display = show ? 'block' : 'none';
}
</script>
</body>
</html>
run.bat
@echo off
python "%~dp0app.py"
pause
三、运行程序
方法一、从源码运行
安装Python最新版,运行命令确保已安装所有必要的依赖包:
pip install flask pandas
双击启动脚本run.bat,程序会自动打开浏览器访问 http://localhost:5000,在文件夹内生成analysis_results、uploads文件夹,以及cdn_analyzer.log文件。
方法二、从成品运行
使用已经编译好的成品exe文件,此方法最方便