Source code for src.visualization.charts

"""Chart generation for Share of Search analysis."""

from pathlib import Path
from typing import Dict, List, Optional
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from ..utils.errors import VisualizationError
from ..utils.logging import get_logger

logger = get_logger(__name__)


[docs] class ChartGenerator: """Generate professional charts for Share of Search analysis.""" # McKinsey color palette MCKINSEY_COLORS = [ '#003366', # Navy blue '#00A86B', # Accent green '#708090', # Slate gray '#4682B4', # Steel blue '#DC143C', # Crimson accent '#FFD700', # Gold accent ] def __init__(self, theme: str = "professional", dpi: int = 300): """ Initialize chart generator. Args: theme: Visual theme (professional, minimal, dark) dpi: Chart resolution """ self.theme = theme self.dpi = dpi self._apply_theme() def _apply_theme(self): """Apply McKinsey-style visual theme to charts.""" # Clean white background sns.set_style("white") # Set McKinsey color palette sns.set_palette(self.MCKINSEY_COLORS) # Professional typography (smaller fonts) plt.rcParams['font.family'] = 'sans-serif' plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica', 'DejaVu Sans'] plt.rcParams['font.size'] = 8 plt.rcParams['axes.labelsize'] = 9 plt.rcParams['axes.titlesize'] = 10 plt.rcParams['xtick.labelsize'] = 7 plt.rcParams['ytick.labelsize'] = 7 plt.rcParams['legend.fontsize'] = 7 plt.rcParams['legend.title_fontsize'] = 8 # Clean lines and grids plt.rcParams['axes.linewidth'] = 0.5 plt.rcParams['grid.linewidth'] = 0.5 plt.rcParams['grid.alpha'] = 0.15 plt.rcParams['axes.edgecolor'] = '#CCCCCC'
[docs] def generate_line_chart( self, df: pd.DataFrame, output_path: Path, title: str = "Search Interest Over Time" ) -> None: """ Generate McKinsey-style line chart showing trends over time. Args: df: DataFrame with date, query, value columns output_path: Path to save chart title: Chart title """ try: fig, ax = plt.subplots(figsize=(10, 5.5)) # Plot each query with McKinsey colors for idx, query in enumerate(df['query'].unique()): query_data = df[df['query'] == query].sort_values('date') ax.plot( query_data['date'], query_data['value'], marker='o', markersize=2, linewidth=1.5, label=query, color=self.MCKINSEY_COLORS[idx % len(self.MCKINSEY_COLORS)], alpha=0.9 ) ax.set_xlabel('Date', fontsize=9, color='#333333') ax.set_ylabel('Search Interest', fontsize=9, color='#333333') ax.set_title(title, fontsize=10, fontweight='normal', loc='left', pad=15) # Professional legend ax.legend(loc='upper right', frameon=False, fontsize=7) # Subtle grid ax.grid(True, alpha=0.15, linewidth=0.5, color='#CCCCCC') ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) # Clean x-axis labels plt.xticks(rotation=0, ha='center', fontsize=7) plt.yticks(fontsize=7) # Source citation fig.text(0.99, 0.01, 'Source: Google Trends', ha='right', fontsize=6, color='#666666', style='italic') plt.tight_layout() plt.savefig(output_path, dpi=self.dpi, bbox_inches='tight', facecolor='white') plt.close() logger.debug(f"Generated McKinsey-style line chart: {output_path}") except Exception as e: raise VisualizationError(f"Failed to generate line chart: {e}")
[docs] def generate_area_chart( self, df: pd.DataFrame, output_path: Path, title: str = "Share of Search Evolution" ) -> None: """ Generate McKinsey-style stacked area chart showing share evolution. Args: df: DataFrame with date, query, share_of_search columns output_path: Path to save chart title: Chart title """ try: # Pivot data for stacked area pivot_df = df.pivot_table( index='date', columns='query', values='share_of_search', aggfunc='mean' ) fig, ax = plt.subplots(figsize=(10, 5.5)) # Create stacked area chart with McKinsey colors ax.stackplot( pivot_df.index, *[pivot_df[col] for col in pivot_df.columns], labels=pivot_df.columns, colors=self.MCKINSEY_COLORS[:len(pivot_df.columns)], alpha=0.85 ) ax.set_xlabel('Date', fontsize=9, color='#333333') ax.set_ylabel('Share of Search (%)', fontsize=9, color='#333333') ax.set_title(title, fontsize=10, fontweight='normal', loc='left', pad=15) # Professional legend ax.legend(loc='upper right', frameon=False, fontsize=7) # Subtle grid ax.grid(True, alpha=0.15, linewidth=0.5, color='#CCCCCC') ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_ylim(0, 100) # Clean axis labels plt.xticks(rotation=0, ha='center', fontsize=7) plt.yticks(fontsize=7) # Source citation fig.text(0.99, 0.01, 'Source: Google Trends', ha='right', fontsize=6, color='#666666', style='italic') plt.tight_layout() plt.savefig(output_path, dpi=self.dpi, bbox_inches='tight', facecolor='white') plt.close() logger.debug(f"Generated McKinsey-style area chart: {output_path}") except Exception as e: raise VisualizationError(f"Failed to generate area chart: {e}")
[docs] def generate_bar_chart( self, metrics_df: pd.DataFrame, output_path: Path, title: str = "Average Share by Brand" ) -> None: """ Generate McKinsey-style bar chart comparing average shares. Args: metrics_df: DataFrame with query and avg_share columns output_path: Path to save chart title: Chart title """ try: fig, ax = plt.subplots(figsize=(10, 5.5)) # Sort by average share plot_data = metrics_df.sort_values('avg_share', ascending=True) # Create horizontal bar chart with McKinsey colors bars = ax.barh( range(len(plot_data)), plot_data['avg_share'], color=self.MCKINSEY_COLORS[0], alpha=0.85 ) # Set y-axis labels ax.set_yticks(range(len(plot_data))) ax.set_yticklabels(plot_data['query'], fontsize=8) ax.set_xlabel('Average Share of Search (%)', fontsize=9, color='#333333') ax.set_title(title, fontsize=10, fontweight='normal', loc='left', pad=15) # Subtle grid ax.grid(True, alpha=0.15, linewidth=0.5, axis='x', color='#CCCCCC') ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) # Add value labels on bars for i, value in enumerate(plot_data['avg_share']): ax.text( value + 0.5, i, f"{value:.1f}%", va='center', fontsize=7, color='#333333' ) # Source citation fig.text(0.99, 0.01, 'Source: Google Trends', ha='right', fontsize=6, color='#666666', style='italic') plt.tight_layout() plt.savefig(output_path, dpi=self.dpi, bbox_inches='tight', facecolor='white') plt.close() logger.debug(f"Generated McKinsey-style bar chart: {output_path}") except Exception as e: raise VisualizationError(f"Failed to generate bar chart: {e}")
[docs] def generate_pie_chart( self, metrics_df: pd.DataFrame, output_path: Path, title: str = "Current Market Share" ) -> None: """ Generate McKinsey-style pie chart showing current market distribution. Args: metrics_df: DataFrame with query and avg_share columns output_path: Path to save chart title: Chart title """ try: fig, ax = plt.subplots(figsize=(8, 8)) # Create pie chart with McKinsey colors wedges, texts, autotexts = ax.pie( metrics_df['avg_share'].values, labels=metrics_df['query'].values, autopct='%1.1f%%', startangle=90, colors=self.MCKINSEY_COLORS[:len(metrics_df)], textprops={'fontsize': 8} ) # Style percentage text for autotext in autotexts: autotext.set_color('white') autotext.set_fontweight('normal') autotext.set_fontsize(7) # Style labels for text in texts: text.set_fontsize(8) text.set_color('#333333') ax.set_title(title, fontsize=10, fontweight='normal', loc='left', pad=15) # Source citation fig.text(0.99, 0.01, 'Source: Google Trends', ha='right', fontsize=6, color='#666666', style='italic') plt.tight_layout() plt.savefig(output_path, dpi=self.dpi, bbox_inches='tight', facecolor='white') plt.close() logger.debug(f"Generated McKinsey-style pie chart: {output_path}") except Exception as e: raise VisualizationError(f"Failed to generate pie chart: {e}")
[docs] def generate_all_charts( self, df: pd.DataFrame, metrics_df: pd.DataFrame, output_dir: Path ) -> List[Path]: """ Generate all charts. Args: df: Full time series DataFrame metrics_df: Aggregate metrics DataFrame output_dir: Directory to save charts Returns: List of generated chart paths """ output_dir.mkdir(parents=True, exist_ok=True) charts = [] try: # Line chart line_path = output_dir / "line_trends.png" self.generate_line_chart(df, line_path) charts.append(line_path) # Area chart area_path = output_dir / "area_share.png" self.generate_area_chart(df, area_path) charts.append(area_path) # Bar chart bar_path = output_dir / "bar_average.png" self.generate_bar_chart(metrics_df, bar_path) charts.append(bar_path) # Pie chart pie_path = output_dir / "pie_current.png" self.generate_pie_chart(metrics_df, pie_path) charts.append(pie_path) logger.info(f"Generated {len(charts)} charts in {output_dir}") return charts except Exception as e: raise VisualizationError(f"Failed to generate charts: {e}")