본문 바로가기

인공지능/딥러닝

[딥러닝DL] DNN 분류데이터사용

~ 목차 ~

< 와인종류 분류하기 >

- 입력계층의 출력크기 64
- 은닉계층의 출력크기 32
- 나머지는?
- 콜백함수 모두 적용
- 옵티마이저 모두 적용 후 가장 좋은 성능일 때 옵티마이저 확인해 보기
- 정밀도, 재현율, f1-score, confusion_matrix 출력

 

 

실습해보기 - 내코드 🐥

 


import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras

import matplotlib.pyplot as plt
import seaborn as sns

"""한글처리"""
plt.rc("font", family = "Malgun Gothic")
"""마이너스 기호 처리"""
plt.rcParams["axes.unicode_minus"] = True

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler



"""정확도 : 훈련 및 검증 정확도에서 사용한 score()함수와 동일함"""
from sklearn.metrics import accuracy_score
"""정밀도"""
from sklearn.metrics import precision_score
"""재현율"""
from sklearn.metrics import recall_score
"""f1-score"""
from sklearn.metrics import f1_score



import numpy as np
"""오차행렬 계산 라이브러리"""
from sklearn.metrics import confusion_matrix
"""오차행렬도 시각화 라이브러리"""
from sklearn.metrics import ConfusionMatrixDisplay

"""실행 결과를 동일하게 하기 위한 처리(완전 동일하지 않을 수도 있음)"""
tf.keras.utils.set_random_seed(42)

"""연산 고정"""
tf.config.experimental.enable_op_determinism()

 


"""데이터 불러오기"""
data = pd.read_csv("./data/08_wine.csv")
data

 

 


"""데이터 추출하기"""
X = data.iloc[:, :-1]
y = data["class"]
X.shape, y.shape

 

((6497, 3), (6497,))

 

 


"""데이터 정규화"""
ss = StandardScaler()
X_scaler = ss.fit_transform(X)
X_scaler.shape, X_scaler

((6497, 3),
 array([[-0.91546416, -0.7447781 ,  1.81308951],
        [-0.58006813, -0.59764007, -0.11507303],
        [-0.58006813, -0.66069923,  0.25811972],
        ...,
        [-0.91546416, -0.89191614, -1.42124765],
        [ 1.9354021 , -0.91293585,  0.75571005],
        [ 1.09691202, -0.97599501,  0.25811972]]))

 

 


"""데이터 분류하기"""
train_X, temp_X, train_y, temp_y = train_test_split(X_scaler, y, test_size = 0.4, random_state=42)
val_X, test_X, val_y, test_y = train_test_split(temp_X, temp_y, test_size = 0.5, random_state=42)


print(f"train_input={train_X.shape} / train_target={train_y.shape}")
print(f"test_input={val_X.shape} / test_target={val_y.shape}")
print(f"test_input={test_X.shape} / test_target={test_y.shape}")

train_input=(3898, 3) / train_target=(3898,)
test_input=(1299, 3) / test_target=(1299,)
test_input=(1300, 3) / test_target=(1300,)

 

 


"""모델 생성하기"""
model = keras.Sequential()
model

<keras.engine.sequential.Sequential at 0x2e0fd5d7a90>

 

 


"""계층 생성하기"""
model.add(
    keras.layers.Dense(64, activation="sigmoid", input_shape=(3, ), name="input_layer")
)
model.add(
    keras.layers.Dense(32, activation="sigmoid", name="hidden_layer")
)
model.add(
    keras.layers.Dense(1, activation="sigmoid", name="output_layer")
)
model.summary()

 

 


""" 옵티마이저 모두 적용 후 가장 좋은 성능일 때 옵티마이저 확인해 보기 """

""" 옵티마이저를 리스트로 정의하기 """
optimizers = ["sgd", "adagrad", "rmsprop", "adam"]

"""최고 정확도를 담을 변수 정의"""
best_acc = 0.0

"""최고 정확도일 때의 학습방법을 담을 변수 정의"""
best_acc_opt = ""

"""최저 손실율을 담을 변수 정의"""
best_loss = 1.0

"""최저 손실율일 때의 학습방법을 담을 변수 정의"""
best_loss_opt = ""

"""옵티마이저의 학습방법을 반복하여 성능 확인하기"""
for opt in optimizers:
    print(f"-------------------------{opt}---------------------------")
    
    """모델 설정하기"""
    model.compile(optimizer=opt, loss="binary_crossentropy", metrics="accuracy")
    
    
    """훈련하기"""
    ### ModelCheckPoint()
    checkpoint_cb = keras.callbacks.ModelCheckpoint("./model/best_model.h5", save_best_only=True)

    ### EarlyStopping()
    early_stopping_cb = keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)
    
    history = model.fit(train_X, train_y, epochs=100, verbose=1, validation_data=(val_X, val_y), callbacks=[checkpoint_cb, early_stopping_cb])

    """성능 평가하기"""
    val = model.evaluate(val_X, val_y)
    
    """가장 높은 정확도와 이때 학습방법 저장하기"""
    if best_acc < val[1]:
        best_acc = val[1]
        best_acc_opt = opt
        
    """가장 낮은 손실율와 이때 학습방법 저장하기"""
    if best_loss > val[0]:
        best_loss = val[0]
        best_loss_opt = opt

    print()

print("전체 실행 종료 >>>>>>>>>>")
print(f"최고정확도 옵티마이저 : {best_acc_opt} / 최고정확도 : {best_acc} ")
print(f"최저손실율 옵티마이저 : {best_loss_opt} / 최저손실율 : {best_loss} ")

 

 

 


""" 예측하기 """

y_pred = model.predict(test_X)

threshold = 0.5
binary_test_y = np.where(test_y >= threshold, 1, 0)
binary_y_pred = np.where(y_pred >= threshold, 1, 0)


"""정밀도"""
pre = precision_score(binary_test_y, binary_y_pred)

"""재현율"""
rec = recall_score(binary_test_y, binary_y_pred)

"""f1-score"""
f1 = f1_score(binary_test_y, binary_y_pred)

"""confusion_matrix"""
conf = confusion_matrix(binary_test_y, binary_y_pred)

print(f"정밀도 : {pre}, 재현율 : {rec}, f1 : {f1}, 오차행렬 : {conf}")

정밀도 : 0.8100664767331434, 재현율 : 0.8978947368421053, f1 : 0.8517224163754368,
오차행렬 : [[150 200]
                   [ 97 853]]

 

 


"""오차 행렬 시각화"""
sns.heatmap(conf, annot=True, fmt="d")
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

 

 

 


 

 

실습해보기 - 강사님  🐥

 


"""사용할 라이브러리"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

 


"""데이터 불러들이기"""
data = pd.read_csv("./data/08_wine.csv")
data.head(1)

 


"""독립변수와 종속변수로 분류하기"""
X = data.iloc[:, :-1]
y = data["class"]

X.shape, y.shape

((6497, 3), (6497,))

 

 


X, y

(      alcohol  sugar    pH
 0         9.4    1.9  3.51
 1         9.8    2.6  3.20
 2         9.8    2.3  3.26
 3         9.8    1.9  3.16
 4         9.4    1.9  3.51
 ...       ...    ...   ...
 6492     11.2    1.6  3.27
 6493      9.6    8.0  3.15
 6494      9.4    1.2  2.99
 6495     12.8    1.1  3.34
 6496     11.8    0.8  3.26
 
 [6497 rows x 3 columns],
 0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 6492    1.0
 6493    1.0
 6494    1.0
 6495    1.0
 6496    1.0
 Name: class, Length: 6497, dtype: float64)

 


"""데이터 스케일링(정규화, 표준화)"""
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
X_scaled.shape

(6497, 3)

 

<데이터 분류>
- 2단계(훈련:테스트 - 7:3, 7.5:2.5 많이 사용) : 데이터 양이 많지 않을때
- 3단계(훈련:검증:테스트 - 6:2:2, 5:3:2 많이 사용) : 검증 데이터를 가지고 튜닝하고 성능검사 
       : 테스트 데이터는 앞에서 성능검증하고 마지막에 테스트로 예측 후 성능평가함.

 

 


""" 훈련 : 검증 : 테스트 = 6 : 2 : 2로 분리하기 """
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.4, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape

((3898, 3), (3898,), (1299, 3), (1299,), (1300, 3), (1300,))

 

 


"""모델 생성"""
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense
model = Sequential()
model

<keras.engine.sequential.Sequential at 0x1b25ddaf100>

 

 


"""입력계층
 - input_dim=3 : 입력 특성의 갯수 (input_shape 대신에 사용가능)
"""
model.add(Dense(64, activation="relu", input_dim=3))

"""은닉계층
- 드롭아웃(DropOut)도 들어갈 수 있다. 과대적합이 났을때 사용해서 튜닝하고 확인(은닉계층 전에 사용)"""
model.add(Dense(32, activation="relu"))

"""출력계층"""
model.add(Dense(1, activation="sigmoid"))

 

 


model.summary()

 


"""모델 설정하기"""
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

 

 


"""모델 훈련시키기"""
model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val))

 


"""성능 검증"""
loss, acc = model.evaluate(X_test, y_test)
loss, acc
### 훈련, 검증, 테스트 순으로 좋은 것을 확인할수 있음 => 좋은 결과값

(0.3532726466655731, 0.8500000238418579)

 


"""예측하기"""
y_pred = model.predict(X_test)
y_pred

array([[0.9963542 ],
       [0.61787796],
       [0.18863559],
       ...,
       [0.0634653 ],
       [0.06351039],
       [0.9970112 ]], dtype=float32)

 


"""평가를 위해서 예측값을 종속변수의 범주 형태(0 or 1)로 변환하기"""
base = 0.5
### True = 1 // False = 0 (true를 int로 바꾸면 1이되고 false를 int로 바꾸면 0이 됨)
binary_pred = (y_pred > base).astype(int)
binary_pred

array([[1],
       [1],
       [0],
       ...,
       [0],
       [0],
       [1]])

 

 


"""성능 평가"""
precision = precision_score(y_test, binary_pred)
recall = recall_score(y_test, binary_pred)
f1 = f1_score(y_test, binary_pred)

precision, recall, f1

(0.8887744593202883, 0.9084210526315789, 0.8984903695991671)

 

 


"""매트릭스 확인"""
conf_matrix = confusion_matrix(y_test, binary_pred)
conf_matrix

array([[242, 108],
       [ 87, 863]], dtype=int64)

728x90