| 1 |
df = pd.read_csv('/datasets/music_project.csv') |
| 2 |
df.head(10) |
| 3 |
df.info() |
| 4 |
df.shape |
| 5 |
df.set_axis(['user_id', 'track_name', 'artist_name', 'genre_name', 'city', 'time', 'weekday'], axis='columns', inplace=True) |
| 6 |
df.isnull().sum() |
| 7 |
df['track_name'] = df['track_name'].fillna('unknown') |
| 8 |
df.isnull().sum() |
| 9 |
df['artist_name'] = df['artist_name'].fillna('unknown') |
| 10 |
df.isnull().sum() |
| 11 |
df.dropna(subset = ['genre_name'], inplace = True) |
| 12 |
df.isnull().sum() |
| 13 |
df.duplicated().sum() |
| 14 |
df = df.drop_duplicates().reset_index(drop=True) |
| 15 |
df.duplicated().sum() |
| 16 |
genres_list = df['genre_name'].unique() |
| 17 |
def find_genre(name): |
| 18 |
count = 0 |
| 19 |
for i in genres_list: |
| 20 |
if i == name: |
| 21 |
count += 1 |
| 22 |
return count |
| 23 |
find_genre('hip') |
| 24 |
find_genre('hip-hop') |
| 25 |
def find_hip_hop(data, wrong): |
| 26 |
data['genre_name'].replace(wrong, 'hiphop') |
| 27 |
return data[data['genre_name'] == wrong]['genre_name'].count() |
| 28 |
find_hip_hop(df, 'hip') |
| 29 |
df.groupby('city')['genre_name'].count() |
| 30 |
df.groupby('weekday')['genre_name'].count() |
| 31 |
def number_tracks(data, weekday, city): |
| 32 |
track_list = data[(data['weekday'] == weekday) & (data['city'] == city)] |
| 33 |
track_list_count = track_list['genre_name'].count() |
| 34 |
return track_list_count |
| 35 |
mm = number_tracks(df, 'Monday', 'Moscow') |
| 36 |
ms = number_tracks(df, 'Monday', 'Saint-Petersburg') |
| 37 |
wm = number_tracks(df, 'Wednesday', 'Moscow') |
| 38 |
ws = number_tracks(df, 'Wednesday', 'Saint-Petersburg') |
| 39 |
fm = number_tracks(df, 'Friday', 'Moscow') |
| 40 |
fs = number_tracks(df, 'Friday', 'Saint-Petersburg') |
| 41 |
number_tracks_table = pd.DataFrame(data = [['Moscow', mm, mw, mf], ['Saint-Petersburg', ms, ws, fs]], columns=['city', 'monday', 'wednesday', 'friday']) |
| 42 |
moscow_general = df[df['city'] == 'Moscow'] |
| 43 |
spb_general = df[df['city'] == 'Saint-Petersburg'] |
| 44 |
def genre_weekday(df, day, time1, time2): |
| 45 |
genre_list = df[(df['weekday'] == day) & (time2 > df['time'] > time1)] |
| 46 |
genre_list_sorted = genre_list.groupby('genre_name')['genre_name'].count() |
| 47 |
return genre_list_sorted |
| 48 |
genre_weekday(moscow_general, 'Monday', 7, 11) |
Комментарии