인공지능

딥러닝 학습할 때 예측결과를 높이는 여러가지 방법들

쿠와와 2020. 12. 9. 19:45
# Day_26_01_PokerCompetition.py
# 99_Poker.py
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn import preprocessing, model_selection
from operator import itemgetter
# poker-hand 데이터셋을 추가해주면 동작 가능


# acc 올리기
# 1번. 스케일링
def apply_scaling(x):
    return preprocessing.scale(x)


# 2번. 원핫 벡터
def apply_onehot(x):
    # suit : 4가지 * 5 = 20
    # card : 13가지 * 5 = 65

    enc = preprocessing.LabelBinarizer()

    # binds = []
    # for i in range(x.shape[1]):
    #     binds.append(enc.fit_transform(x[:, i]))

    # 위의 코드를 컴프리핸션으로 바꿔줬음
    # 수평으로 있는 것을 전부다 합쳐줌
    return np.hstack([enc.fit_transform(x[:, i]) for i in range(x.shape[1])])


# 3번. 정렬(원본 순서 유지)
def apply_sort_1(x):
    # print(x[0])     # [ 2 10  1  4  3 10  1 10  2 11]
    x = [np.reshape(i, [5, 2]) for i in x]
    # print(x[0])
    # [[ 2 10]
    #  [ 1  4]
    #  [ 3 10]
    #  [ 1 10]
    #  [ 2 11]]
    # 1번째 데이터에 대하여 sort 1번이 같을 때는 0번까지
    # x = [sorted(i, key=lambda t: t[1]) for i in x]
    x = [sorted(i, key=itemgetter(1, 0)) for i in x]
    # print(x[0])     # [array[1, 4], array[ 2, 10], array[ 3, 10], array[ 1, 10], array[ 2, 11]]
    x = [np.reshape(i, (-1,)) for i in x]
    # print(x[0])     # [ 1  4  2 10  3 10  1 10  2 11]
    return np.int32(x)


# 4번. 정렬(숫자를 앞에, 무늬를 뒤에)
def apply_sort_2(x):
    cards = x[:, 1::2]     # 1, 3, 5, 7, 9 모든 행에 대해서 index 규칙
    suits = x[:, ::2]      # 0, 2, 4, 6, 8
    cards.sort()
    return np.hstack([cards, suits])


# 5번 feature 추가
def apply_features(x):
    suits = [(r[0] == r[2] and r[0] == r[4] and r[0] == r[6] and r[0] == r[8]) for r in x]

    # strait 피쳐(bool)
    straits = []

    for r in x:
        d = sorted(r[1::2])
        straits.append(d[0]+1 == d[1] and d[0]+2 == d[2] and d[0]+3 == d[3] and d[0]+4 == d[4])

    return np.hstack([x, np.transpose([suits, straits])])


poker = pd.read_csv('data/poker-train.csv', index_col=0)

x = poker.values[:, :-1]
y = poker.values[:, -1]

# 적용
# x = apply_sort_1(x)
# x = apply_sort_2(x)
# x = apply_features(x)
# print(x.shape)          # (25010, 12)
# exit(-1)
# x = apply_onehot(x)
# x = apply_scaling(x)
# print(x.shape)          # (25010, 85)

x_train, x_valid, y_train, y_valid = model_selection.train_test_split(x, y, train_size=0.8)

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))

model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=['acc'])

model.fit(x_train, y_train,
          epochs=100, batch_size=100, verbose=2,
          validation_data=(x_valid, y_valid))

# baseline  : val_loss: 0.6503 - val_acc: 0.7359  loss가 들죽 날죽함
# scale     : val_loss: 0.0299 - val_acc: 0.9972
# one_hot   : val_loss: 0.1645 - val_acc: 0.9910
# sort1     : val_loss: 0.0229 - val_acc: 0.9944  데이터가 튈 때가 있음 -> scaling 하면됨
# sort2     : val_loss: 0.0294 - val_acc: 0.9950
# feature   : val_loss: 0.3692 - val_acc: 0.8948  그리 좋은 피처는 아니였나봄 다른 피처를 추가해보자

'인공지능' 카테고리의 다른 글

tensor flow hub -> finetune 사용하기  (0) 2020.12.20
tensor flow hub 사용하기.  (0) 2020.12.20
tfds ( tensor flow data set )사용하기  (0) 2020.12.20
Keras functional  (0) 2020.12.10