| 1 |
#imports |
| 2 |
import re |
| 3 |
import json |
| 4 |
import pandas as pd |
| 5 |
import numpy as np |
| 6 |
import seaborn as sns |
| 7 |
import matplotlib.pyplot as plt |
| 8 |
import scipy |
| 9 |
from scipy import stats |
| 10 |
from scipy.stats import randint |
| 11 |
import sklearn |
| 12 |
from sklearn.decomposition import PCA |
| 13 |
from sklearn.manifold import TSNE |
| 14 |
from sklearn.ensemble import RandomForestClassifier |
| 15 |
from sklearn.ensemble import RandomForestRegressor |
| 16 |
from sklearn.neural_network import MLPClassifier |
| 17 |
from sklearn.preprocessing import StandardScaler |
| 18 |
from sklearn.linear_model import LogisticRegression |
| 19 |
from sklearn.linear_model import LinearRegression |
| 20 |
from sklearn.pipeline import make_pipeline |
| 21 |
from sklearn.model_selection import RandomizedSearchCV |
| 22 |
from sklearn.model_selection import train_test_split |
| 23 |
from sklearn.model_selection import cross_validate |
| 24 |
from sklearn.metrics import accuracy_score |
| 25 |
import nltk from nltk.corpus import stopwords |
| 26 |
import spacy |
| 27 |
#settings |
| 28 |
pd.options.display.float_format = '{:,.1f}'.format |
| 29 |
#main commands |
| 30 |
pd.DataFrame(data_dict) |
| 31 |
data = pd.Series(data=sales, index=months) |
| 32 |
sales_df = pd.DataFrame(data=sales, index=months) |
| 33 |
df = pd.DataFrame() |
| 34 |
df = pd.read_csv('data.csv') |
| 35 |
df = pd.read_csv('file.csv', index_col='id') |
| 36 |
df = pd.read_excel('file.xlsx', sheet_name='Sheet1') |
| 37 |
df.index |
| 38 |
df.columns |
| 39 |
df.shape |
| 40 |
df.dtypes |
| 41 |
df.values |
| 42 |
df.info() |
| 43 |
df.head() |
| 44 |
df.sample() |
| 45 |
df.describe() |
| 46 |
df.loc |
| 47 |
df.iloc |
| 48 |
df.query('amount > 1000 & status == success') |
| 49 |
df.query('amount > @amount_filter & status == @status_filter') |
| 50 |
df.sum() |
| 51 |
df.groupby('type').sum() |
| 52 |
df.groupby('status').agg('min') |
| 53 |
df.groupby('status').amount.min().sort_values(by='min', ascending=False).head(10) |
| 54 |
print(df) |
| 55 |
orders_with_customers_df = pd.merge(orders, customers, how='inner', left_on='customer_id', right_index=True) |
| 56 |
#sklearn |
| 57 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) |
| 58 |
scaler = StandardScaler() |
| 59 |
X_train_scaled = scaler.fit_transform(X_train) |
| 60 |
X_test_scaled = scaler.transform(X_test) |
| 61 |
clf = RandomForestClassifier() |
| 62 |
clf.fit(X_train, y_train) # learning |
| 63 |
y_pred = clf.predict(X_test) # make prediction |
| 64 |
clf.score(X_train, y_train) |
| 65 |
clf.score(X_test, y_test) |
| 66 |
np.sum(y_pred == y_test) |
| 67 |
len(y_test) |
| 68 |
np.mean(y_pred == y_test) |
| 69 |
clf = MLPClassifier(random_state=1, max_iter=500) |
| 70 |
clf.fit(X_train_scaled, y_train) |
| 71 |
clf.predict_proba(X_test_scaled) |
| 72 |
clf.score(X_train_scaled, y_train) |
| 73 |
clf.score(X_test_scaled, y_test) |
Комментарии