-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata_fetcher.py
More file actions
64 lines (46 loc) · 1.92 KB
/
data_fetcher.py
File metadata and controls
64 lines (46 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
class Fetcher(BaseEstimator, TransformerMixin):
def __init__(self, building_id=122, meter=0, primary_use=99): # 99 referes to none
self.building_id = building_id
self.meter = meter
self.primary_use = primary_use
def fit(self, x, y=None):
return self # nothing else to do
def season_finder(self, month): # 0 for sprint - 1 for summer - 2 for fall - 3 for winter
if month in [3, 4, 5]:
return 0
elif month in [6, 7, 8]:
return 1
elif month in [9, 10, 11]:
return 2
elif month in [12, 1, 2]:
return 3
def transform(self, x):
df = x.copy()
df = df.drop(['Unnamed: 0', 'precip_depth_1_hr',
'cloud_coverage', 'site_id', 'square_feet'], axis=1)
if self.primary_use == 99:
df = df.query(
f'building_id=={self.building_id} & meter=={self.meter}')
df.drop(['building_id', 'meter', 'primary_use'],
axis=1, inplace=True)
else:
df = df.query(
f'building_id=={self.building_id} & meter=={self.meter} & primary_use =={self.primary_use}')
df.drop(['building_id', 'meter', 'primary_use'],
axis=1, inplace=True)
df.loc[:, "timestamp"] = pd.to_datetime(df.loc[:, "timestamp"])
df['season'] = df.month.apply(self.season_finder)
df['weekend'] = df.timestamp.dt.dayofweek > 4
df['day_of_the_week'] = df.timestamp.dt.dayofweek
df.set_index('timestamp', inplace=True)
return df
class Do_nothing(BaseEstimator, TransformerMixin):
def __init__(self): # 99 referes to none
return
def fit(self, x, y=None):
return self # nothing else to do
def transform(self, x):
return x.astype(int)