Unverified Commit e2fd3fa9 authored by Starfleet-Command's avatar Starfleet-Command Committed by GitHub
Browse files

Files de regresion con python

parent da4f9d8e
SKU,NOMRE,FAMILIA
21,JAMON AMERICANO 170GR FUD,JAMÓN
214,JAMON CHIMEX 396 GRS,JAMÓN
185,JAMON COCIDO AHUMADO 340 GRS OMA,JAMÓN
384,JAMON DE PAVO VIRGINIA 450GR FUD,JAMÓN
640,JAMON DE PAVO VIRGINIA CUIDATE 290G,JAMÓN
323,JAMON DE PIERNA BALANCE SRF 250G,JAMÓN
172,JAMON PAVO 250 GRS CHX,JAMÓN
489,JAMON REAL 300G SAN RAFAEL,JAMÓN
30,JAMON REAL DE PAVO 300G SAN RAFAEL,JAMÓN
180,JAMON SERRANO 100G SRF,JAMÓN
5196,JAMÓN SERRANO ESPAÑOL 100G TGM,JAMÓN
474,JAMON VIRGINIA 290GR FUD,JAMÓN
583,JAMON VIRGINIA DE PAVO 290GR FUD,JAMÓN
492,SNAX JAMON FUD,JAMÓN
159,PECHUGA DE PAVO BALANCE 250 GR,PECHUGA DE PAVO
5155,PECHUGA DE PAVO REB 250G TGM,PECHUGA DE PAVO
12,PECHUGA DE PAVO SAN RAFAEL,PECHUGA DE PAVO
675,PECHUGA DE PAVO SRF BALANCE GRUESA,PECHUGA DE PAVO
116,PECHUGA DE PAVO VIRGINIA 250GR FUD,PECHUGA DE PAVO
638,PECHUGA DE PAVO VIRGINIA CUIDATE 250G,PECHUGA DE PAVO
878,SALCHICHA BOTANERA DE PAVO 500G SRF,SALCHICHA
372,SALCHICHA CLASICA 454 GRS OMA,SALCHICHA
360,SALCHICHA COCKTAIL DE PAVO FUD,SALCHICHA
909,SALCHICHA DE PAVO 500G BALANCE SRF,SALCHICHA
990,SALCHICHA DE PAVO 750 GR SAN RAFAEL,SALCHICHA
227,SALCHICHA DE PAVO BALANCE SRF 750 GR,SALCHICHA
645,SALCHICHA DE PAVO CUIDATE 1KG,SALCHICHA
644,SALCHICHA DE PAVO CUIDATE 500G,SALCHICHA
1012,SALCHICHA DE PAVO CUIDATE ROLL UP 750 gr,SALCHICHA
890,SALCHICHA HOT DOG 1KG FUD,SALCHICHA
791,SALCHICHA HOT DOG 500G FUD,SALCHICHA
983,SALCHICHA HOTDOG PAVO 500 G FUD,SALCHICHA
380,SALCHICHA JUMBO ORIGINAL 454 GRS OMA,SALCHICHA
1001,SALCHICHA P/ ASAR ALEMANA 800 GRS CHX,SALCHICHA
307,SALCHICHA PARA ASAR 800 GRS CHX,SALCHICHA
405,SALCHICHA PARA ASAR QUESO 800 GRS CHX,SALCHICHA
892,SALCHICHA PAVO 1KG SRF,SALCHICHA
374,SALCHICHA PAVO 454 GRS OMA,SALCHICHA
341,SALCHICHA PAVO 500 GR,SALCHICHA
294,SALCHICHA PAVO 500 GRS SRF,SALCHICHA
993,SALCHICHA PAVO 640 GRS CHX,SALCHICHA
345,SALCHICHA PECHUGA DE PAVO BALANCE 500GR,SALCHICHA
773,SALCHICHA PECHUGA PAVO 500 GRS SRF,SALCHICHA
557,SALCHICHA POLACA QUESO/JALAPEÑO CHX,SALCHICHA
889,SALCHICHA TURKEY LINE 1KG FUD,SALCHICHA
975,SALCHICHA VIENA 266G FUD,SALCHICHA
461,SALCHICHA VIENA 550 GRS SRF,SALCHICHA
326,SALCHICHA VIENA GRANEL CHX,SALCHICHA
495,SNAX SALCHICHA FUD,SALCHICHA
import pandas as pd
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
import numpy as np
from multiprocessing import Process, Queue
import math
def getSmape(actual, predicted):
EPSILON = 1e-10
return np.mean(2.0 * np.abs(actual - predicted) / ((np.abs(actual) + np.abs(predicted)) + EPSILON))
def linear_reg(X, Y):
# Fit linear regression model and return prediction
model_k = linear_model.LinearRegression(fit_intercept=True)
model_k.fit(X, Y)
return model_k
def predict(model, X):
return model.predict(X)
def loopPreds(regs, data, progress_counter, features, concat_queue, smape_queue):
concats = []
smape = []
for i in data.CONCAT.unique():
dataSubset = data.loc[data['CONCAT'] == i]
regsSubset = regs.loc[regs['CONCAT'] == i]
concats.append(i)
tempPredict = []
tempReal = []
Trim_X = dataSubset.drop(columns=["QTY", "DMDUNIT", "DMDGROUP",
"LOC", "STARTDATE", "CONCAT"], axis=1)
if len(features[0]) == 1:
X = Trim_X[[features[0][0]]]
elif len(features[0]) == 2:
X = Trim_X[[features[0][0], features[0][1]]]
elif len(features[0]) == 3:
X = Trim_X[[features[0][0], features[0][1], features[0][2]]]
elif len(features[0]) == 4:
X = Trim_X[[features[0][0], features[0][1],
features[0][2], features[0][3]]]
Y = dataSubset.QTY
model = linear_reg(X, Y)
progress_counter = progress_counter+1
if(progress_counter % 100 == 0):
print("Progress:" + str(progress_counter) +
"/"+str(len(data.CONCAT.unique())))
for r in dataSubset.STARTDATE.unique():
entry = dataSubset.loc[dataSubset["STARTDATE"] == r]
Trim_X = entry.drop(columns=["QTY", "DMDUNIT", "DMDGROUP",
"LOC", "STARTDATE", "CONCAT"], axis=1)
if len(features[0]) == 1:
X = Trim_X[[features[0][0]]]
elif len(features[0]) == 2:
X = Trim_X[[features[0][0], features[0][1]]]
elif len(features[0]) == 3:
X = Trim_X[[features[0][0], features[0][1], features[0][2]]]
elif len(features[0]) == 4:
X = Trim_X[[features[0][0], features[0][1],
features[0][2], features[0][3]]]
predicted = predict(model, X)
real = entry.QTY
tempPredict.append(predicted)
tempReal.append(real)
concat_smape = getSmape(np.array(tempReal), np.array(tempPredict))
smape.append(concat_smape)
concat_queue.put(concats)
smape_queue.put(smape)
if __name__ == "__main__":
regs = pd.read_csv("D:/DeepStorage/Regresiones.csv")
data = pd.read_csv("D:/DeepStorage/Fixed_Full_FilteredSalesData.csv")
data = data.dropna()
concats = []
smape = []
process_list = []
progress_counter = 0
features = regs["features"].apply(eval)
processes = 8
unique_concats = data.CONCAT.unique()
chunk = math.floor(len(unique_concats)/processes)
concat_queue = Queue()
smape_queue = Queue()
for i in range(0, processes):
conditions = data['CONCAT'].isin(unique_concats[i*chunk:(i+1)*chunk])
data_subset = data.loc[conditions]
if i != processes:
p = Process(target=loopPreds, args=(
regs, data_subset, progress_counter, features, concat_queue, smape_queue))
else:
conditions = data['CONCAT'].isin(
unique_concats[i*chunk:i*chunk + len(unique_concats)-(chunk*processes)])
data_subset = data.loc[conditions]
p = Process(target=loopPreds, args=(
regs, data_subset, progress_counter, features, concat_queue, smape_queue))
process_list.append(p)
p.start()
for i in range(len(process_list)):
concats.extend(concat_queue.get())
smape.extend(smape_queue.get())
for p in process_list:
p.join()
dictio = {"CONCAT": concats, "SMAPE": smape}
regInfo = pd.DataFrame(dictio)
print(regInfo)
regInfo.to_csv("D:/DeepStorage/RealvsPredict.csv", index=False)
pass
import pandas as pd
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
import numpy as np
import itertools
def fit_linear_reg(X, Y):
# Fit linear regression model and return RSS and R squared values
model_k = linear_model.LinearRegression(fit_intercept=True)
model_k.fit(X, Y)
RSS = mean_squared_error(Y, model_k.predict(X)) * len(Y)
R_squared = model_k.score(X, Y)
return RSS, R_squared
def best_subset(data):
# Initialization variables
Y = data.QTY
X = data.drop(columns=["QTY", "DMDUNIT", "DMDGROUP",
"LOC", "STARTDATE", "CONCAT"], axis=1)
k = 4
remaining_features = list(X.columns.values)
features = []
# Due to 1 indexing of the loop...
RSS_list, R_squared_list = [np.inf], [np.inf]
features_list = dict()
for i in range(1, k+1):
best_RSS = np.inf
for combo in itertools.combinations(remaining_features, 1):
# Store temp result
RSS = fit_linear_reg(X[list(combo) + features], Y)
if RSS[0] < best_RSS:
best_RSS = RSS[0]
best_R_squared = RSS[1]
best_feature = combo[0]
# Updating variables for next loop
features.append(best_feature)
remaining_features.remove(best_feature)
# Saving values for plotting
RSS_list.append(best_RSS)
R_squared_list.append(best_R_squared)
features_list[i] = features.copy()
best_r_index = 0
best_r = 0
for i in range(len(R_squared_list)):
if R_squared_list[i] > best_r and not R_squared_list[i] == np.Inf:
best_r = R_squared_list[i]
best_r_index = i
if best_r_index == 0:
return features_list[4], R_squared_list[4]
else:
return features_list[best_r_index], R_squared_list[best_r_index]
if __name__ == "__main__":
data = pd.read_csv("D:/DeepStorage/Fixed_Full_FilteredSalesData.csv")
data = data.dropna()
print("data read")
concats = []
features = []
r_squared = []
progress_counter = 0
for i in data.CONCAT.unique():
dataSubset = data.loc[data['CONCAT'] == i]
temp_features, temp_Rsquared = best_subset(dataSubset)
concats.append(i)
features.append(temp_features)
r_squared.append(temp_Rsquared)
progress_counter = progress_counter+1
if(progress_counter % 250 == 0):
print("Progress:" + str(progress_counter) +
"/"+str(len(data.CONCAT.unique())))
dictio = {"CONCAT": concats, "features": features, "r_squared": r_squared}
regInfo = pd.DataFrame(dictio)
print(regInfo)
regInfo.to_csv("D:/DeepStorage/Regresiones.csv", index=False)
pass
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment