| pandas timeseries |
| 1 | import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from lightgbm import LGBMRegressor as regr from lightgbm import LGBMClassifier as clf from sklearn.metrics import accuracy_score from collections import defaultdict, deque, Counter from itertools import permutations, combinations, product arr = np.array([1, 2, 3]) arr.reshape(2, -1) arr.T np.dot(A, B) np.linalg.inv(A) np.mean(arr, axis=0) np.argmax(arr) np.where(arr > 0, 1, 0) df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) df.describe() df[df['A'] > 1] df.groupby('A').sum() df['C'] = df['A'] + df['B'] df.drop_duplicates() df.fillna(0) df.merge(other_df, on='key', how='inner') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) model = LinearRegression() model.fit(X_train_scaled, y_train) y_pred = model.predict(X_test) accuracy_score(y_test, y_pred.round()) d = defaultdict(int) q = deque([1, 2, 3]) q.appendleft(0) c = Counter("mississippi") c.most_common(2) list(permutations([1, 2, 3])) list(combinations([1, 2, 3], 2)) list(product([0, 1], repeat=3)) import pandas as pd import numpy as np df = pd.DataFrame({ 'timestamp': pd.date_range(start='2024-01-01', periods=10, freq='D'), 'price': [100, 102, 101, 103, 105, 107, 106, 108, 110, 112], 'volume': [500, 550, 520, 580, 600, 620, 610, 630, 650, 700] }) df['timestamp'] = pd.to_datetime(df['timestamp']) df.set_index('timestamp', inplace=True) df['SMA_3'] = df['price'].rolling(window=3).mean() df['STD_3'] = df['price'].rolling(window=3).std() df['price_lag_1'] = df['price'].shift(1) df['rolling_max'] = df['price'].rolling(window=3).max() df['rolling_min'] = df['price'].rolling(window=3).min() df['is_max'] = df['price'] == df['rolling_max'] df['price_change'] = df['price'].diff() df['volatility'] = df['price_change'].abs().rolling(window=3).mean() df['log_return'] = np.log(df['price'] / df['price'].shift(1)) sector_map = {'AAPL': 'Tech', 'TSLA': 'Auto', 'XOM': 'Energy'} df['sector'] = df['ticker'].map(sector_map) df['price_norm'] = (df['price'] - df['price'].min()) / (df['price'].max() - df['price'].min()) df_filtered = df[df['volume'] > 600] df['day_of_week'] = df.index.dayofweek df_grouped = df.groupby('day_of_week')['price'].mean() for lag in range(1, 4): df[f'price_lag_{lag}'] = df['price'].shift(lag) print(df). |
Комментарии