一、项目背景1.1 痛点分析数据可视化是汇报的核心环节但传统方式效率极低环节手工方式时间选择图表类型纠结对比20分钟调整配色反复尝试1小时排版布局手动对齐1小时数据标注逐个添加30分钟总计-3小时做12个图表就是36小时将近一周。1.2 技术需求核心需求AI根据数据特征自动推荐图表类型自动配色方案商务/科技/活力自动排版布局多图表对齐支持静态图表matplotlib和交互式图表pyecharts一键导出PNG/HTML/PDF二、技术架构Excel数据 → 数据分析 → AI推荐 → 图表生成 → 排版导出↑ ↑ ↑ ↑ ↑pandas 统计特征 Qwen3 matplotlib reportlabpyecharts技术栈 - **pandas**数据读取和分析 - **matplotlib**静态图表生成 - **pyecharts**交互式图表生成 - **DashScope/Qwen3**AI智能推荐 - **reportlab**PDF报告导出 --- ## 三、环境准备 ### 3.1 安装依赖 bash pip install pandas matplotlib pyecharts dashscope openpyxl reportlab3.2 配置# config.py DASHSCOPE_API_KEY your-api-key-here # 配色方案 COLOR_SCHEMES { business: [#2C3E50, #3498DB, #2ECC71, #E74C3C, #F39C12, #9B59B6], tech: [#0D1B2A, #1B2838, #2196F3, #00BCD4, #4CAF50, #FF5722], vibrant: [#FF6B6B, #4ECDC4, #45B7D1, #96E6A1, #DDA0DD, #F7DC6F] }四、核心模块实现4.1 数据分析模块自动分析数据特征为图表推荐提供依据import pandas as pd import numpy as np class DataAnalyzer: def __init__(self): self.features {} def analyze(self, df): 分析数据特征 self.features { row_count: len(df), col_count: len(df.columns), numeric_cols: list(df.select_dtypes(include[np.number]).columns), category_cols: list(df.select_dtypes(include[object]).columns), datetime_cols: list(df.select_dtypes(include[datetime64]).columns), has_time_series: self._detect_time_series(df), has_categories: len(df.select_dtypes(include[object]).columns) 0, category_counts: {}, correlations: {} } # 分类列的唯一值数量 for col in self.features[category_cols]: self.features[category_counts][col] df[col].nunique() # 数值列相关性 numeric_df df.select_dtypes(include[np.number]) if len(numeric_df.columns) 1: self.features[correlations] numeric_df.corr().to_dict() return self.features def _detect_time_series(self, df): 检测是否为时间序列数据 for col in df.columns: try: pd.to_datetime(df[col]) return True except (ValueError, TypeError): continue return False def get_summary(self): 生成数据摘要供AI分析 summary f 数据概况 - 行数{self.features[row_count]} - 列数{self.features[col_count]} - 数值列{, .join(self.features[numeric_cols])} - 分类列{, .join(self.features[category_cols])} - 是否时间序列{self.features[has_time_series]} - 分类列唯一值{self.features[category_counts]} return summary4.2 AI图表推荐模块用Qwen3根据数据特征推荐最佳图表类型import dashscope import json class ChartRecommender: def __init__(self, api_key): dashscope.api_key api_key def recommend(self, data_summary, purpose汇报): AI推荐图表类型 prompt f 你是一位数据可视化专家。根据以下数据特征推荐最合适的图表组合。 数据摘要 {data_summary} 用途{purpose} 请返回JSON格式包含推荐的图表列表 {{ charts: [ {{ type: 图表类型line/bar/pie/scatter/radar/heatmap, title: 图表标题, x_col: X轴数据列名, y_col: Y轴数据列名, reason: 推荐理由 }} ], color_scheme: 推荐配色方案business/tech/vibrant, layout: 布局建议2x2/3x2/1x3 }} 规则 1. 时间序列数据优先用折线图 2. 分类占比用饼图分类≤6个 3. 分类对比用柱状图 4. 两个数值变量的关系用散点图 5. 多维度评估用雷达图 6. 最多推荐6个图表 response dashscope.Generation.call( modelqwen3-72b, messages[{role: user, content: prompt}], result_formatmessage ) content response.output.choices[0].message.content import re json_match re.search(r\{.*\}, content, re.DOTALL) if json_match: return json.loads(json_match.group()) return None4.3 静态图表生成模块matplotlib根据AI推荐自动生成图表import matplotlib.pyplot as plt import matplotlib matplotlib.rcParams[font.sans-serif] [SimHei] matplotlib.rcParams[axes.unicode_minus] False class StaticChartGenerator: def __init__(self, color_schemebusiness): self.colors COLOR_SCHEMES.get(color_scheme, COLOR_SCHEMES[business]) def generate(self, df, chart_config, output_pathdashboard.png): 根据配置生成图表 charts chart_config[charts] layout chart_config.get(layout, 2x2) rows, cols map(int, layout.split(x)) fig, axes plt.subplots(rows, cols, figsize(7*cols, 5*rows)) fig.suptitle(数据分析报告, fontsize18, fontweightbold) if rows * cols 1: axes [[axes]] elif rows 1 or cols 1: axes [axes] if rows 1 else [[ax] for ax in axes] for idx, chart in enumerate(charts): if idx rows * cols: break r, c divmod(idx, cols) ax axes[r][c] if isinstance(axes[r], list) else axes[r] chart_type chart[type] title chart[title] if chart_type line: self._draw_line(ax, df, chart) elif chart_type bar: self._draw_bar(ax, df, chart) elif chart_type pie: self._draw_pie(ax, df, chart) elif chart_type scatter: self._draw_scatter(ax, df, chart) ax.set_title(title, fontsize13, fontweightbold, pad10) plt.tight_layout() plt.savefig(output_path, dpi150, bbox_inchestight) print(f图表已生成{output_path}) def _draw_line(self, ax, df, config): 折线图 x df[config[x_col]] y df[config[y_col]] ax.plot(x, y, markero, colorself.colors[0], linewidth2) ax.fill_between(range(len(x)), y, alpha0.15, colorself.colors[0]) ax.set_xlabel(config[x_col]) ax.set_ylabel(config[y_col]) ax.grid(True, alpha0.3) # 标注最大值 max_idx y.idxmax() ax.annotate(f{y[max_idx]:.0f}, xy(max_idx, y[max_idx]), fontsize10, colorself.colors[3], fontweightbold) def _draw_bar(self, ax, df, config): 柱状图 if config.get(x_col) in df.columns: data df.groupby(config[x_col])[config[y_col]].sum() else: data df[config[y_col]] bars ax.bar(range(len(data)), data.values, colorself.colors[:len(data)]) ax.set_xticks(range(len(data))) ax.set_xticklabels(data.index, rotation30, haright) # 数据标签 for bar in bars: height bar.get_height() ax.text(bar.get_x() bar.get_width()/2., height, f{height:,.0f}, hacenter, vabottom, fontsize9) def _draw_pie(self, ax, df, config): 饼图 data df.groupby(config[x_col])[config[y_col]].sum() wedges, texts, autotexts ax.pie( data.values, labelsdata.index, autopct%1.1f%%, colorsself.colors[:len(data)], startangle90, textprops{fontsize: 10} ) for autotext in autotexts: autotext.set_fontweight(bold) def _draw_scatter(self, ax, df, config): 散点图 x df[config[x_col]] y df[config[y_col]] ax.scatter(x, y, cself.colors[0], alpha0.6, s50, edgecolorswhite) # 趋势线 z np.polyfit(x, y, 1) p np.poly1d(z) ax.plot(x.sort_values(), p(x.sort_values()), colorself.colors[3], linestyle--, alpha0.8) ax.set_xlabel(config[x_col]) ax.set_ylabel(config[y_col]) ax.grid(True, alpha0.3)4.4 交互式图表生成模块pyecharts生成可交互的HTML图表from pyecharts.charts import Line, Bar, Pie, Scatter, Grid, Page from pyecharts import options as opts class InteractiveChartGenerator: def __init__(self, color_schemebusiness): self.colors COLOR_SCHEMES.get(color_scheme, COLOR_SCHEMES[business]) def generate_line(self, df, x_col, y_col, title): 交互式折线图 chart ( Line() .add_xaxis(df[x_col].tolist()) .add_yaxis( y_col, df[y_col].tolist(), is_smoothTrue, areastyle_optsopts.AreaStyleOpts(opacity0.3), linestyle_optsopts.LineStyleOpts(width3) ) .set_global_opts( title_optsopts.TitleOpts(titletitle), tooltip_optsopts.TooltipOpts(triggeraxis), toolbox_optsopts.ToolboxOpts(is_showTrue), datazoom_optsopts.DataZoomOpts() ) .set_colors(self.colors) ) return chart def generate_bar(self, df, x_col, y_col, title): 交互式柱状图 data df.groupby(x_col)[y_col].sum() chart ( Bar() .add_xaxis(data.index.tolist()) .add_yaxis( y_col, data.values.tolist(), itemstyle_optsopts.ItemStyleOpts( colorself.colors[0] ) ) .set_global_opts( title_optsopts.TitleOpts(titletitle), tooltip_optsopts.TooltipOpts(triggeraxis), toolbox_optsopts.ToolboxOpts(is_showTrue) ) ) return chart def generate_dashboard(self, charts, output_pathdashboard.html): 生成仪表盘 page Page(layoutPage.SimplePageLayout) for chart in charts: page.add(chart) page.render(output_path) print(f交互式图表已生成{output_path})五、完整流程整合def create_report(excel_path, api_key, output_formatstatic): 生成数据分析报告 print(开始生成报告...) # 1. 读取数据 df pd.read_excel(excel_path) print(f✅ 数据读取完成{len(df)}行 × {len(df.columns)}列) # 2. 分析数据特征 analyzer DataAnalyzer() features analyzer.analyze(df) summary analyzer.get_summary() print(✅ 数据分析完成) # 3. AI推荐图表 recommender ChartRecommender(api_key) chart_config recommender.recommend(summary) print(f✅ AI推荐{len(chart_config[charts])}个图表) # 4. 生成图表 if output_format static: generator StaticChartGenerator(chart_config.get(color_scheme, business)) generator.generate(df, chart_config, report.png) else: generator InteractiveChartGenerator(chart_config.get(color_scheme, business)) charts [] for config in chart_config[charts]: if config[type] line: charts.append(generator.generate_line(df, config[x_col], config[y_col], config[title])) elif config[type] bar: ...(truncated)...