Indian Premier League (IPL) Cricket Player Career Progression Analysis¶
Explore how IPL players' careers have evolved from 2008 to 2025. This notebook demonstrates:
- Top run-scorers and wicket-takers career curves
- Peak performance seasons
- Player comparisons
- Debut year impact on career longevity
Data Source: iplrecords.com
In [35]:
# %pip install pandas matplotlib numpy seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['figure.dpi'] = 100
In [36]:
# Load datasets
master = pd.read_csv('data/ipl_players_master.csv')
batting = pd.read_csv('data/ipl_batting_career_progression.csv')
bowling = pd.read_csv('data/ipl_bowling_career_progression.csv')
print(f'Players: {len(master):,}')
print(f'Batting records: {len(batting):,}')
print(f'Bowling records: {len(bowling):,}')
print(f'Seasons: {batting["season"].min():.0f} - {batting["season"].max():.0f}')
Players: 771 Batting records: 2,782 Bowling records: 2,075 Seasons: 2008 - 2025
1. Top 10 Run-Scorers - Career Progression¶
In [37]:
top10_batsmen = master.nlargest(10, 'career_runs')['player_name'].tolist()
top10_batting = batting[batting['player_name'].isin(top10_batsmen)]
fig, ax = plt.subplots(figsize=(14, 7))
for player in top10_batsmen:
data = top10_batting[top10_batting['player_name'] == player].sort_values('season')
cum_runs = data['runs_scored'].cumsum()
ax.plot(data['season'], cum_runs, marker='o', linewidth=2, label=player)
ax.set_xlabel('Season', fontsize=12)
ax.set_ylabel('Cumulative Runs', fontsize=12)
ax.set_title('Top 10 IPL Run-Scorers: Career Progression', fontsize=14)
ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.show()
2. Peak Seasons - Best Batting Years¶
In [38]:
peak_batting = batting.loc[batting.groupby('player_name')['runs_scored'].idxmax()]
peak_batting = peak_batting[peak_batting['runs_scored'] >= 400].nlargest(15, 'runs_scored')
fig, ax = plt.subplots(figsize=(12, 6))
bars = ax.barh(
peak_batting['player_name'] + ' (' + peak_batting['season'].astype(int).astype(str) + ')',
peak_batting['runs_scored']
)
ax.set_xlabel('Runs Scored', fontsize=12)
ax.set_title('Best Individual IPL Seasons (400+ Runs)', fontsize=14)
ax.invert_yaxis()
for bar, val in zip(bars, peak_batting['runs_scored']):
ax.text(val + 10, bar.get_y() + bar.get_height()/2, f'{int(val):,}', va='center')
plt.tight_layout()
plt.show()
3. Top 10 Wicket-Takers - Career Progression¶
In [39]:
top10_bowlers = master.nlargest(10, 'career_wickets')['player_name'].tolist()
top10_bowling = bowling[bowling['player_name'].isin(top10_bowlers)]
fig, ax = plt.subplots(figsize=(14, 7))
for player in top10_bowlers:
data = top10_bowling[top10_bowling['player_name'] == player].sort_values('season')
cum_wickets = data['total_wickets'].cumsum()
ax.plot(data['season'], cum_wickets, marker='s', linewidth=2, label=player)
ax.set_xlabel('Season', fontsize=12)
ax.set_ylabel('Cumulative Wickets', fontsize=12)
ax.set_title('Top 10 IPL Wicket-Takers: Career Progression', fontsize=14)
ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.show()
4. Player Comparison: Virat Kohli vs Rohit Sharma¶
In [40]:
players_to_compare = ['Virat Kohli', 'Rohit Sharma']
compare = batting[batting['player_name'].isin(players_to_compare)].sort_values('season')
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Runs comparison
for player in players_to_compare:
data = compare[compare['player_name'] == player]
axes[0].plot(data['season'], data['runs_scored'], marker='o', linewidth=2, label=player)
axes[0].set_title('Season-by-Season Runs', fontsize=13)
axes[0].set_xlabel('Season')
axes[0].set_ylabel('Runs')
axes[0].legend()
# Strike rate comparison
for player in players_to_compare:
data = compare[compare['player_name'] == player]
axes[1].plot(data['season'], data['batting_strike_rate'], marker='s', linewidth=2, label=player)
axes[1].set_title('Season-by-Season Strike Rate', fontsize=13)
axes[1].set_xlabel('Season')
axes[1].set_ylabel('Strike Rate')
axes[1].legend()
plt.suptitle('Kohli vs Rohit: Batting Comparison', fontsize=14)
plt.tight_layout()
plt.show()
5. Debut Year vs Career Longevity¶
In [41]:
fig, ax = plt.subplots(figsize=(12, 6))
debut_counts = master.groupby('debut_year').agg(
player_count=('player_name', 'count'),
avg_seasons=('seasons_played', 'mean'),
total_runs=('career_runs', 'mean')
).reset_index()
ax.bar(debut_counts['debut_year'], debut_counts['player_count'], alpha=0.7)
ax.set_xlabel('Debut Year', fontsize=12)
ax.set_ylabel('Number of Players', fontsize=12)
ax.set_title('Players Debuting Each IPL Season', fontsize=14)
for _, row in debut_counts.iterrows():
ax.text(row['debut_year'], row['player_count'] + 1, str(int(row['player_count'])), ha='center')
plt.tight_layout()
plt.show()
6. All-Rounders: Players with 500+ Runs AND 20+ Wickets in a Season¶
In [42]:
batting_agg = batting.groupby(['player_name', 'season']).agg(
season_runs=('runs_scored', 'sum')
).reset_index()
bowling_agg = bowling.groupby(['player_name', 'season']).agg(
season_wickets=('total_wickets', 'sum')
).reset_index()
all_rounders = batting_agg.merge(bowling_agg, on=['player_name', 'season'])
all_rounders = all_rounders[
(all_rounders['season_runs'] >= 500) & (all_rounders['season_wickets'] >= 20)
].sort_values('season_runs', ascending=False)
print('Players with 500+ runs AND 20+ wickets in a single IPL season:')
if len(all_rounders) > 0:
display(all_rounders[['player_name', 'season', 'season_runs', 'season_wickets']])
else:
print(' None found with these thresholds')
# Try with lower thresholds
relaxed = batting_agg.merge(bowling_agg, on=['player_name', 'season'])
relaxed = relaxed[
(relaxed['season_runs'] >= 200) & (relaxed['season_wickets'] >= 10)
].sort_values(['season_runs', 'season_wickets'], ascending=False)
print(f'\nPlayers with 200+ runs AND 10+ wickets in a season ({len(relaxed)} found):')
display(relaxed.head(15)[['player_name', 'season', 'season_runs', 'season_wickets']])
Players with 500+ runs AND 20+ wickets in a single IPL season: None found with these thresholds Players with 200+ runs AND 10+ wickets in a season (47 found):
| player_name | season | season_runs | season_wickets | |
|---|---|---|---|---|
| 620 | Jacques Kallis | 2010 | 572 | 13 |
| 1416 | Shane Watson | 2013 | 543 | 13 |
| 134 | Andre Russell | 2019 | 510 | 11 |
| 1504 | Sunil Narine | 2024 | 488 | 17 |
| 1412 | Shane Watson | 2008 | 472 | 17 |
| 149 | Andrew Symonds | 2010 | 429 | 12 |
| 722 | KA Pollard | 2013 | 420 | 10 |
| 622 | Jacques Kallis | 2012 | 409 | 15 |
| 550 | Hardik Pandya | 2019 | 402 | 14 |
| 459 | Dwayne Bravo | 2012 | 371 | 15 |
| 1295 | Rohit Sharma | 2009 | 362 | 11 |
| 1498 | Sunil Narine | 2018 | 357 | 17 |
| 888 | Marcus Stoinis | 2020 | 352 | 13 |
| 137 | Andre Russell | 2022 | 335 | 17 |
| 131 | Andre Russell | 2015 | 326 | 14 |
7. Economy Rate Trends Over the Years¶
In [43]:
season_economy = bowling.groupby('season').agg(
avg_economy=('economy_rate', 'mean'),
median_economy=('economy_rate', 'median'),
total_bowlers=('player_name', 'nunique')
).reset_index()
fig, ax1 = plt.subplots(figsize=(12, 6))
ax1.plot(season_economy['season'], season_economy['avg_economy'], marker='o', linewidth=2, label='Average Economy')
ax1.plot(season_economy['season'], season_economy['median_economy'], marker='s', linewidth=2, label='Median Economy')
ax1.set_xlabel('Season', fontsize=12)
ax1.set_ylabel('Economy Rate', fontsize=12)
ax1.set_title('IPL Bowling Economy Rate Trends (2008-2025)', fontsize=14)
ax1.legend(loc='upper left')
ax2 = ax1.twinx()
ax2.bar(season_economy['season'], season_economy['total_bowlers'], alpha=0.2, color='green')
ax2.set_ylabel('Number of Bowlers', fontsize=12)
plt.tight_layout()
plt.show()
8. Highest Strike Rate in a Season (Min 100 Runs)¶
In [44]:
high_sr = batting[batting['runs_scored'] >= 100].nlargest(15, 'batting_strike_rate')
fig, ax = plt.subplots(figsize=(12, 6))
bars = ax.barh(
high_sr['player_name'] + ' (' + high_sr['season'].astype(int).astype(str) + ')',
high_sr['batting_strike_rate']
)
ax.set_xlabel('Strike Rate', fontsize=12)
ax.set_title('Highest Batting Strike Rate in a Season (Min 100 Runs)', fontsize=14)
ax.invert_yaxis()
for bar, (_, row) in zip(bars, high_sr.iterrows()):
ax.text(row['batting_strike_rate'] + 1, bar.get_y() + bar.get_height()/2,
f'{row["batting_strike_rate"]:.0f} ({int(row["runs_scored"])} runs)', va='center')
plt.tight_layout()
plt.show()
In [ ]: