Indian Premier League (IPL) Cricket Player Career Progression Analysis¶

Explore how IPL players' careers have evolved from 2008 to 2025. This notebook demonstrates:

  • Top run-scorers and wicket-takers career curves
  • Peak performance seasons
  • Player comparisons
  • Debut year impact on career longevity

Data Source: iplrecords.com

In [35]:
# %pip install pandas matplotlib numpy seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['figure.dpi'] = 100
In [36]:
# Load datasets
master = pd.read_csv('data/ipl_players_master.csv')
batting = pd.read_csv('data/ipl_batting_career_progression.csv')
bowling = pd.read_csv('data/ipl_bowling_career_progression.csv')


print(f'Players: {len(master):,}')
print(f'Batting records: {len(batting):,}')
print(f'Bowling records: {len(bowling):,}')
print(f'Seasons: {batting["season"].min():.0f} - {batting["season"].max():.0f}')
Players: 771
Batting records: 2,782
Bowling records: 2,075
Seasons: 2008 - 2025

1. Top 10 Run-Scorers - Career Progression¶

In [37]:
top10_batsmen = master.nlargest(10, 'career_runs')['player_name'].tolist()
top10_batting = batting[batting['player_name'].isin(top10_batsmen)]

fig, ax = plt.subplots(figsize=(14, 7))
for player in top10_batsmen:
    data = top10_batting[top10_batting['player_name'] == player].sort_values('season')
    cum_runs = data['runs_scored'].cumsum()
    ax.plot(data['season'], cum_runs, marker='o', linewidth=2, label=player)

ax.set_xlabel('Season', fontsize=12)
ax.set_ylabel('Cumulative Runs', fontsize=12)
ax.set_title('Top 10 IPL Run-Scorers: Career Progression', fontsize=14)
ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.show()
No description has been provided for this image

2. Peak Seasons - Best Batting Years¶

In [38]:
peak_batting = batting.loc[batting.groupby('player_name')['runs_scored'].idxmax()]
peak_batting = peak_batting[peak_batting['runs_scored'] >= 400].nlargest(15, 'runs_scored')

fig, ax = plt.subplots(figsize=(12, 6))
bars = ax.barh(
    peak_batting['player_name'] + ' (' + peak_batting['season'].astype(int).astype(str) + ')',
    peak_batting['runs_scored']
)
ax.set_xlabel('Runs Scored', fontsize=12)
ax.set_title('Best Individual IPL Seasons (400+ Runs)', fontsize=14)
ax.invert_yaxis()

for bar, val in zip(bars, peak_batting['runs_scored']):
    ax.text(val + 10, bar.get_y() + bar.get_height()/2, f'{int(val):,}', va='center')

plt.tight_layout()
plt.show()
No description has been provided for this image

3. Top 10 Wicket-Takers - Career Progression¶

In [39]:
top10_bowlers = master.nlargest(10, 'career_wickets')['player_name'].tolist()
top10_bowling = bowling[bowling['player_name'].isin(top10_bowlers)]

fig, ax = plt.subplots(figsize=(14, 7))
for player in top10_bowlers:
    data = top10_bowling[top10_bowling['player_name'] == player].sort_values('season')
    cum_wickets = data['total_wickets'].cumsum()
    ax.plot(data['season'], cum_wickets, marker='s', linewidth=2, label=player)

ax.set_xlabel('Season', fontsize=12)
ax.set_ylabel('Cumulative Wickets', fontsize=12)
ax.set_title('Top 10 IPL Wicket-Takers: Career Progression', fontsize=14)
ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.show()
No description has been provided for this image

4. Player Comparison: Virat Kohli vs Rohit Sharma¶

In [40]:
players_to_compare = ['Virat Kohli', 'Rohit Sharma']
compare = batting[batting['player_name'].isin(players_to_compare)].sort_values('season')

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Runs comparison
for player in players_to_compare:
    data = compare[compare['player_name'] == player]
    axes[0].plot(data['season'], data['runs_scored'], marker='o', linewidth=2, label=player)
axes[0].set_title('Season-by-Season Runs', fontsize=13)
axes[0].set_xlabel('Season')
axes[0].set_ylabel('Runs')
axes[0].legend()

# Strike rate comparison
for player in players_to_compare:
    data = compare[compare['player_name'] == player]
    axes[1].plot(data['season'], data['batting_strike_rate'], marker='s', linewidth=2, label=player)
axes[1].set_title('Season-by-Season Strike Rate', fontsize=13)
axes[1].set_xlabel('Season')
axes[1].set_ylabel('Strike Rate')
axes[1].legend()

plt.suptitle('Kohli vs Rohit: Batting Comparison', fontsize=14)
plt.tight_layout()
plt.show()
No description has been provided for this image

5. Debut Year vs Career Longevity¶

In [41]:
fig, ax = plt.subplots(figsize=(12, 6))

debut_counts = master.groupby('debut_year').agg(
    player_count=('player_name', 'count'),
    avg_seasons=('seasons_played', 'mean'),
    total_runs=('career_runs', 'mean')
).reset_index()

ax.bar(debut_counts['debut_year'], debut_counts['player_count'], alpha=0.7)
ax.set_xlabel('Debut Year', fontsize=12)
ax.set_ylabel('Number of Players', fontsize=12)
ax.set_title('Players Debuting Each IPL Season', fontsize=14)

for _, row in debut_counts.iterrows():
    ax.text(row['debut_year'], row['player_count'] + 1, str(int(row['player_count'])), ha='center')

plt.tight_layout()
plt.show()
No description has been provided for this image

6. All-Rounders: Players with 500+ Runs AND 20+ Wickets in a Season¶

In [42]:
batting_agg = batting.groupby(['player_name', 'season']).agg(
    season_runs=('runs_scored', 'sum')
).reset_index()

bowling_agg = bowling.groupby(['player_name', 'season']).agg(
    season_wickets=('total_wickets', 'sum')
).reset_index()

all_rounders = batting_agg.merge(bowling_agg, on=['player_name', 'season'])
all_rounders = all_rounders[
    (all_rounders['season_runs'] >= 500) & (all_rounders['season_wickets'] >= 20)
].sort_values('season_runs', ascending=False)

print('Players with 500+ runs AND 20+ wickets in a single IPL season:')
if len(all_rounders) > 0:
    display(all_rounders[['player_name', 'season', 'season_runs', 'season_wickets']])
else:
    print('  None found with these thresholds')
    # Try with lower thresholds
    relaxed = batting_agg.merge(bowling_agg, on=['player_name', 'season'])
    relaxed = relaxed[
        (relaxed['season_runs'] >= 200) & (relaxed['season_wickets'] >= 10)
    ].sort_values(['season_runs', 'season_wickets'], ascending=False)
    print(f'\nPlayers with 200+ runs AND 10+ wickets in a season ({len(relaxed)} found):')
    display(relaxed.head(15)[['player_name', 'season', 'season_runs', 'season_wickets']])
Players with 500+ runs AND 20+ wickets in a single IPL season:
  None found with these thresholds

Players with 200+ runs AND 10+ wickets in a season (47 found):
player_name season season_runs season_wickets
620 Jacques Kallis 2010 572 13
1416 Shane Watson 2013 543 13
134 Andre Russell 2019 510 11
1504 Sunil Narine 2024 488 17
1412 Shane Watson 2008 472 17
149 Andrew Symonds 2010 429 12
722 KA Pollard 2013 420 10
622 Jacques Kallis 2012 409 15
550 Hardik Pandya 2019 402 14
459 Dwayne Bravo 2012 371 15
1295 Rohit Sharma 2009 362 11
1498 Sunil Narine 2018 357 17
888 Marcus Stoinis 2020 352 13
137 Andre Russell 2022 335 17
131 Andre Russell 2015 326 14

7. Economy Rate Trends Over the Years¶

In [43]:
season_economy = bowling.groupby('season').agg(
    avg_economy=('economy_rate', 'mean'),
    median_economy=('economy_rate', 'median'),
    total_bowlers=('player_name', 'nunique')
).reset_index()

fig, ax1 = plt.subplots(figsize=(12, 6))

ax1.plot(season_economy['season'], season_economy['avg_economy'], marker='o', linewidth=2, label='Average Economy')
ax1.plot(season_economy['season'], season_economy['median_economy'], marker='s', linewidth=2, label='Median Economy')
ax1.set_xlabel('Season', fontsize=12)
ax1.set_ylabel('Economy Rate', fontsize=12)
ax1.set_title('IPL Bowling Economy Rate Trends (2008-2025)', fontsize=14)
ax1.legend(loc='upper left')

ax2 = ax1.twinx()
ax2.bar(season_economy['season'], season_economy['total_bowlers'], alpha=0.2, color='green')
ax2.set_ylabel('Number of Bowlers', fontsize=12)

plt.tight_layout()
plt.show()
No description has been provided for this image

8. Highest Strike Rate in a Season (Min 100 Runs)¶

In [44]:
high_sr = batting[batting['runs_scored'] >= 100].nlargest(15, 'batting_strike_rate')

fig, ax = plt.subplots(figsize=(12, 6))
bars = ax.barh(
    high_sr['player_name'] + ' (' + high_sr['season'].astype(int).astype(str) + ')',
    high_sr['batting_strike_rate']
)
ax.set_xlabel('Strike Rate', fontsize=12)
ax.set_title('Highest Batting Strike Rate in a Season (Min 100 Runs)', fontsize=14)
ax.invert_yaxis()

for bar, (_, row) in zip(bars, high_sr.iterrows()):
    ax.text(row['batting_strike_rate'] + 1, bar.get_y() + bar.get_height()/2,
            f'{row["batting_strike_rate"]:.0f} ({int(row["runs_scored"])} runs)', va='center')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]: