-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNetflix_Analysis.py
More file actions
68 lines (59 loc) · 2.46 KB
/
Netflix_Analysis.py
File metadata and controls
68 lines (59 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Load data
df = pd.read_csv('netflix_content_2023.csv')
# Clean Hours Viewed
df['Hours Viewed'] = df['Hours Viewed'].str.replace(',', '', regex=False).astype(np.int64)
# Convert Release Date
df['Release Date'] = pd.to_datetime(df['Release Date'], errors='coerce')
df['Release Month'] = df['Release Date'].dt.month
df['Release Day'] = df['Release Date'].dt.day_name()
df['Release Season'] = df['Release Month'].map({
12: 'Winter', 1: 'Winter', 2: 'Winter',
3: 'Spring', 4: 'Spring', 5: 'Spring',
6: 'Summer', 7: 'Summer', 8: 'Summer',
9: 'Fall', 10: 'Fall', 11: 'Fall'
})
# Set style
sns.set(style='whitegrid')
# Plot 1: Content Type Distribution
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x='Content Type', palette='Set2', hue='Content Type', legend=False)
plt.title('Content Type Distribution')
plt.tight_layout()
plt.show()
# Plot 2: Top 10 Most Watched Titles
top10 = df.sort_values('Hours Viewed', ascending=False).head(10)
plt.figure(figsize=(10, 6))
sns.barplot(data=top10, y='Title', x='Hours Viewed', palette='magma')
plt.title('Top 10 Most Watched Netflix Titles (2023)')
plt.tight_layout()
plt.show()
# Plot 3: Viewership by Season
seasonal = df.groupby('Release Season')['Hours Viewed'].sum().reindex(['Winter', 'Spring', 'Summer', 'Fall'])
seasonal.plot(kind='bar', color='skyblue', title='Viewership by Release Season')
plt.ylabel('Hours Viewed')
plt.tight_layout()
plt.show()
# Plot 4: Top 10 Languages by Viewership
top_langs = df.groupby('Language Indicator')['Hours Viewed'].sum().sort_values(ascending=False).head(10)
top_langs.plot(kind='bar', color='coral', title='Top 10 Languages by Viewership')
plt.ylabel('Total Hours Viewed')
plt.tight_layout()
plt.show()
# Plot 5: Content Releases by Day
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
release_day_counts = df['Release Day'].value_counts().reindex(day_order)
release_day_counts.plot(kind='bar', color='mediumseagreen', title='Content Releases by Day')
plt.ylabel('Number of Releases')
plt.tight_layout()
plt.show()
plt.savefig('assets/plot1_content_type.png')
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x='Content Type', palette='Set2', hue='Content Type', legend=False)
plt.title('Content Type Distribution')
plt.tight_layout()
plt.savefig('assets/plot1_content_type.png') # Save the image
plt.show()