-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathutils.py
More file actions
28 lines (26 loc) · 1.04 KB
/
utils.py
File metadata and controls
28 lines (26 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import random
import time
import pickle
import numpy as np
import matplotlib.pyplot as plt
def get_time_lag(df):
"""
Compute time_lag feature, same task_container_id shared same timestamp for each user
"""
time_dict = {}
time_lag = np.zeros(len(df), dtype=np.float32)
for idx, row in enumerate(df[["user_id", "timestamp", "task_container_id"]].values):
if row[0] not in time_dict:
time_lag[idx] = 0
time_dict[row[0]] = [row[1], row[2], 0] # last_timestamp, last_task_container_id, last_lagtime
else:
if row[2] == time_dict[row[0]][1]:
time_lag[idx] = time_dict[row[0]][2]
else:
time_lag[idx] = row[1] - time_dict[row[0]][0]
time_dict[row[0]][0] = row[1]
time_dict[row[0]][1] = row[2]
time_dict[row[0]][2] = time_lag[idx]
df["time_lag"] = time_lag/1000/60 # convert to miniute
df["time_lag"] = df["time_lag"].clip(0, 1440) # clip to 1440 miniute which is one day
return time_dict