"""Chart generation for Share of Search analysis."""
from pathlib import Path
from typing import Dict, List, Optional
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from ..utils.errors import VisualizationError
from ..utils.logging import get_logger
logger = get_logger(__name__)
[docs]
class ChartGenerator:
"""Generate professional charts for Share of Search analysis."""
# McKinsey color palette
MCKINSEY_COLORS = [
'#003366', # Navy blue
'#00A86B', # Accent green
'#708090', # Slate gray
'#4682B4', # Steel blue
'#DC143C', # Crimson accent
'#FFD700', # Gold accent
]
def __init__(self, theme: str = "professional", dpi: int = 300):
"""
Initialize chart generator.
Args:
theme: Visual theme (professional, minimal, dark)
dpi: Chart resolution
"""
self.theme = theme
self.dpi = dpi
self._apply_theme()
def _apply_theme(self):
"""Apply McKinsey-style visual theme to charts."""
# Clean white background
sns.set_style("white")
# Set McKinsey color palette
sns.set_palette(self.MCKINSEY_COLORS)
# Professional typography (smaller fonts)
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica', 'DejaVu Sans']
plt.rcParams['font.size'] = 8
plt.rcParams['axes.labelsize'] = 9
plt.rcParams['axes.titlesize'] = 10
plt.rcParams['xtick.labelsize'] = 7
plt.rcParams['ytick.labelsize'] = 7
plt.rcParams['legend.fontsize'] = 7
plt.rcParams['legend.title_fontsize'] = 8
# Clean lines and grids
plt.rcParams['axes.linewidth'] = 0.5
plt.rcParams['grid.linewidth'] = 0.5
plt.rcParams['grid.alpha'] = 0.15
plt.rcParams['axes.edgecolor'] = '#CCCCCC'
[docs]
def generate_line_chart(
self,
df: pd.DataFrame,
output_path: Path,
title: str = "Search Interest Over Time"
) -> None:
"""
Generate McKinsey-style line chart showing trends over time.
Args:
df: DataFrame with date, query, value columns
output_path: Path to save chart
title: Chart title
"""
try:
fig, ax = plt.subplots(figsize=(10, 5.5))
# Plot each query with McKinsey colors
for idx, query in enumerate(df['query'].unique()):
query_data = df[df['query'] == query].sort_values('date')
ax.plot(
query_data['date'],
query_data['value'],
marker='o',
markersize=2,
linewidth=1.5,
label=query,
color=self.MCKINSEY_COLORS[idx % len(self.MCKINSEY_COLORS)],
alpha=0.9
)
ax.set_xlabel('Date', fontsize=9, color='#333333')
ax.set_ylabel('Search Interest', fontsize=9, color='#333333')
ax.set_title(title, fontsize=10, fontweight='normal', loc='left', pad=15)
# Professional legend
ax.legend(loc='upper right', frameon=False, fontsize=7)
# Subtle grid
ax.grid(True, alpha=0.15, linewidth=0.5, color='#CCCCCC')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Clean x-axis labels
plt.xticks(rotation=0, ha='center', fontsize=7)
plt.yticks(fontsize=7)
# Source citation
fig.text(0.99, 0.01, 'Source: Google Trends', ha='right', fontsize=6,
color='#666666', style='italic')
plt.tight_layout()
plt.savefig(output_path, dpi=self.dpi, bbox_inches='tight', facecolor='white')
plt.close()
logger.debug(f"Generated McKinsey-style line chart: {output_path}")
except Exception as e:
raise VisualizationError(f"Failed to generate line chart: {e}")
[docs]
def generate_area_chart(
self,
df: pd.DataFrame,
output_path: Path,
title: str = "Share of Search Evolution"
) -> None:
"""
Generate McKinsey-style stacked area chart showing share evolution.
Args:
df: DataFrame with date, query, share_of_search columns
output_path: Path to save chart
title: Chart title
"""
try:
# Pivot data for stacked area
pivot_df = df.pivot_table(
index='date',
columns='query',
values='share_of_search',
aggfunc='mean'
)
fig, ax = plt.subplots(figsize=(10, 5.5))
# Create stacked area chart with McKinsey colors
ax.stackplot(
pivot_df.index,
*[pivot_df[col] for col in pivot_df.columns],
labels=pivot_df.columns,
colors=self.MCKINSEY_COLORS[:len(pivot_df.columns)],
alpha=0.85
)
ax.set_xlabel('Date', fontsize=9, color='#333333')
ax.set_ylabel('Share of Search (%)', fontsize=9, color='#333333')
ax.set_title(title, fontsize=10, fontweight='normal', loc='left', pad=15)
# Professional legend
ax.legend(loc='upper right', frameon=False, fontsize=7)
# Subtle grid
ax.grid(True, alpha=0.15, linewidth=0.5, color='#CCCCCC')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_ylim(0, 100)
# Clean axis labels
plt.xticks(rotation=0, ha='center', fontsize=7)
plt.yticks(fontsize=7)
# Source citation
fig.text(0.99, 0.01, 'Source: Google Trends', ha='right', fontsize=6,
color='#666666', style='italic')
plt.tight_layout()
plt.savefig(output_path, dpi=self.dpi, bbox_inches='tight', facecolor='white')
plt.close()
logger.debug(f"Generated McKinsey-style area chart: {output_path}")
except Exception as e:
raise VisualizationError(f"Failed to generate area chart: {e}")
[docs]
def generate_bar_chart(
self,
metrics_df: pd.DataFrame,
output_path: Path,
title: str = "Average Share by Brand"
) -> None:
"""
Generate McKinsey-style bar chart comparing average shares.
Args:
metrics_df: DataFrame with query and avg_share columns
output_path: Path to save chart
title: Chart title
"""
try:
fig, ax = plt.subplots(figsize=(10, 5.5))
# Sort by average share
plot_data = metrics_df.sort_values('avg_share', ascending=True)
# Create horizontal bar chart with McKinsey colors
bars = ax.barh(
range(len(plot_data)),
plot_data['avg_share'],
color=self.MCKINSEY_COLORS[0],
alpha=0.85
)
# Set y-axis labels
ax.set_yticks(range(len(plot_data)))
ax.set_yticklabels(plot_data['query'], fontsize=8)
ax.set_xlabel('Average Share of Search (%)', fontsize=9, color='#333333')
ax.set_title(title, fontsize=10, fontweight='normal', loc='left', pad=15)
# Subtle grid
ax.grid(True, alpha=0.15, linewidth=0.5, axis='x', color='#CCCCCC')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Add value labels on bars
for i, value in enumerate(plot_data['avg_share']):
ax.text(
value + 0.5,
i,
f"{value:.1f}%",
va='center',
fontsize=7,
color='#333333'
)
# Source citation
fig.text(0.99, 0.01, 'Source: Google Trends', ha='right', fontsize=6,
color='#666666', style='italic')
plt.tight_layout()
plt.savefig(output_path, dpi=self.dpi, bbox_inches='tight', facecolor='white')
plt.close()
logger.debug(f"Generated McKinsey-style bar chart: {output_path}")
except Exception as e:
raise VisualizationError(f"Failed to generate bar chart: {e}")
[docs]
def generate_pie_chart(
self,
metrics_df: pd.DataFrame,
output_path: Path,
title: str = "Current Market Share"
) -> None:
"""
Generate McKinsey-style pie chart showing current market distribution.
Args:
metrics_df: DataFrame with query and avg_share columns
output_path: Path to save chart
title: Chart title
"""
try:
fig, ax = plt.subplots(figsize=(8, 8))
# Create pie chart with McKinsey colors
wedges, texts, autotexts = ax.pie(
metrics_df['avg_share'].values,
labels=metrics_df['query'].values,
autopct='%1.1f%%',
startangle=90,
colors=self.MCKINSEY_COLORS[:len(metrics_df)],
textprops={'fontsize': 8}
)
# Style percentage text
for autotext in autotexts:
autotext.set_color('white')
autotext.set_fontweight('normal')
autotext.set_fontsize(7)
# Style labels
for text in texts:
text.set_fontsize(8)
text.set_color('#333333')
ax.set_title(title, fontsize=10, fontweight='normal', loc='left', pad=15)
# Source citation
fig.text(0.99, 0.01, 'Source: Google Trends', ha='right', fontsize=6,
color='#666666', style='italic')
plt.tight_layout()
plt.savefig(output_path, dpi=self.dpi, bbox_inches='tight', facecolor='white')
plt.close()
logger.debug(f"Generated McKinsey-style pie chart: {output_path}")
except Exception as e:
raise VisualizationError(f"Failed to generate pie chart: {e}")
[docs]
def generate_all_charts(
self,
df: pd.DataFrame,
metrics_df: pd.DataFrame,
output_dir: Path
) -> List[Path]:
"""
Generate all charts.
Args:
df: Full time series DataFrame
metrics_df: Aggregate metrics DataFrame
output_dir: Directory to save charts
Returns:
List of generated chart paths
"""
output_dir.mkdir(parents=True, exist_ok=True)
charts = []
try:
# Line chart
line_path = output_dir / "line_trends.png"
self.generate_line_chart(df, line_path)
charts.append(line_path)
# Area chart
area_path = output_dir / "area_share.png"
self.generate_area_chart(df, area_path)
charts.append(area_path)
# Bar chart
bar_path = output_dir / "bar_average.png"
self.generate_bar_chart(metrics_df, bar_path)
charts.append(bar_path)
# Pie chart
pie_path = output_dir / "pie_current.png"
self.generate_pie_chart(metrics_df, pie_path)
charts.append(pie_path)
logger.info(f"Generated {len(charts)} charts in {output_dir}")
return charts
except Exception as e:
raise VisualizationError(f"Failed to generate charts: {e}")