# Day_26_01_PokerCompetition.py
# 99_Poker.py
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn import preprocessing, model_selection
from operator import itemgetter
# poker-hand 데이터셋을 추가해주면 동작 가능
# acc 올리기
# 1번. 스케일링
def apply_scaling(x):
return preprocessing.scale(x)
# 2번. 원핫 벡터
def apply_onehot(x):
# suit : 4가지 * 5 = 20
# card : 13가지 * 5 = 65
enc = preprocessing.LabelBinarizer()
# binds = []
# for i in range(x.shape[1]):
# binds.append(enc.fit_transform(x[:, i]))
# 위의 코드를 컴프리핸션으로 바꿔줬음
# 수평으로 있는 것을 전부다 합쳐줌
return np.hstack([enc.fit_transform(x[:, i]) for i in range(x.shape[1])])
# 3번. 정렬(원본 순서 유지)
def apply_sort_1(x):
# print(x[0]) # [ 2 10 1 4 3 10 1 10 2 11]
x = [np.reshape(i, [5, 2]) for i in x]
# print(x[0])
# [[ 2 10]
# [ 1 4]
# [ 3 10]
# [ 1 10]
# [ 2 11]]
# 1번째 데이터에 대하여 sort 1번이 같을 때는 0번까지
# x = [sorted(i, key=lambda t: t[1]) for i in x]
x = [sorted(i, key=itemgetter(1, 0)) for i in x]
# print(x[0]) # [array[1, 4], array[ 2, 10], array[ 3, 10], array[ 1, 10], array[ 2, 11]]
x = [np.reshape(i, (-1,)) for i in x]
# print(x[0]) # [ 1 4 2 10 3 10 1 10 2 11]
return np.int32(x)
# 4번. 정렬(숫자를 앞에, 무늬를 뒤에)
def apply_sort_2(x):
cards = x[:, 1::2] # 1, 3, 5, 7, 9 모든 행에 대해서 index 규칙
suits = x[:, ::2] # 0, 2, 4, 6, 8
cards.sort()
return np.hstack([cards, suits])
# 5번 feature 추가
def apply_features(x):
suits = [(r[0] == r[2] and r[0] == r[4] and r[0] == r[6] and r[0] == r[8]) for r in x]
# strait 피쳐(bool)
straits = []
for r in x:
d = sorted(r[1::2])
straits.append(d[0]+1 == d[1] and d[0]+2 == d[2] and d[0]+3 == d[3] and d[0]+4 == d[4])
return np.hstack([x, np.transpose([suits, straits])])
poker = pd.read_csv('data/poker-train.csv', index_col=0)
x = poker.values[:, :-1]
y = poker.values[:, -1]
# 적용
# x = apply_sort_1(x)
# x = apply_sort_2(x)
# x = apply_features(x)
# print(x.shape) # (25010, 12)
# exit(-1)
# x = apply_onehot(x)
# x = apply_scaling(x)
# print(x.shape) # (25010, 85)
x_train, x_valid, y_train, y_valid = model_selection.train_test_split(x, y, train_size=0.8)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01),
loss=tf.keras.losses.sparse_categorical_crossentropy,
metrics=['acc'])
model.fit(x_train, y_train,
epochs=100, batch_size=100, verbose=2,
validation_data=(x_valid, y_valid))
# baseline : val_loss: 0.6503 - val_acc: 0.7359 loss가 들죽 날죽함
# scale : val_loss: 0.0299 - val_acc: 0.9972
# one_hot : val_loss: 0.1645 - val_acc: 0.9910
# sort1 : val_loss: 0.0229 - val_acc: 0.9944 데이터가 튈 때가 있음 -> scaling 하면됨
# sort2 : val_loss: 0.0294 - val_acc: 0.9950
# feature : val_loss: 0.3692 - val_acc: 0.8948 그리 좋은 피처는 아니였나봄 다른 피처를 추가해보자