from google.colab import drive
drive.mount('/content/drive')
cd /content/drive/My Drive/Logical-Rhythm-2k20/Sports_Image_Classification
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os, shutil
from pathlib import Path
import plotly.graph_objects as go
from keras.utils import np_utils
from sklearn.utils import shuffle
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, AveragePooling2D, BatchNormalization
from keras.models import Sequential, Model
from keras.optimizers import SGD, Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping
train_labels = pd.read_csv('train_labels.csv')
train_labels.head()
print(train_labels['sports'].value_counts())
sports = set(train_labels['sports'])
# print(sports)
print(len(sports))
sports_id = {}
id_sports = {}
for idx, val in enumerate(sports):
sports_id[val] = idx
id_sports[idx] = val
# print(id_sports)
image_label = {}
for idx in train_labels.index:
img_no = train_labels['image'][idx].split('/')[-1]
labell = train_labels['sports'][idx]
image_label[img_no] = labell
print(len(image_label))
image_label['1.jpg']
os.listdir()
os.mkdir('Train')
os.mkdir('Val')
os.listdir()
for sport in sports:
p1 = os.path.join('Train',sport)
p2 = os.path.join('Val',sport)
if not os.path.isdir(p1):
os.mkdir(p1)
if not os.path.isdir(p2):
os.mkdir(p2)
# os.listdir('Val')
base_path = 'train'
print(base_path + train_labels['image'][0][1:])
for idx in train_labels.index:
src = base_path + train_labels['image'][idx][1:]
dest = 'Train/' + train_labels['sports'][idx]
shutil.move(src,dest)
SPLIT = 0.9
for f in os.listdir("Train"):
path = "Train/"+f
imgs = os.listdir(path)
split_size = int(SPLIT*len(imgs))
files_to_move = imgs[split_size:]
#print(len(files_to_move))
#print(files_to_move)
for img_f in files_to_move:
src = os.path.join(path,img_f)
dest = os.path.join("Val/"+f,img_f)
shutil.move(src,dest)
print("Training Data")
for f in os.listdir("Train"):
path = "Train/"+f
print(f+ " "+str(len(os.listdir(path))))
print("\n Validation Data")
for f in os.listdir("Train"):
path = "Val/"+f
print(f+ " "+str(len(os.listdir(path))))
# print(train_labels['sports'].value_counts())
Data Preprocessing done
TRAIN_DIR = "Train"
VAL_DIR = "Val"
TEST_DIR = "test"
HEIGHT = 224
WIDTH = 224
BATCH_SIZE = 64
NUM_EPOCHS = 30
NUM_CLASSES = 22
L_R = 0.01
num_train_images = 9925
base_model = ResNet50(weights='imagenet',
include_top=False,
input_shape=(HEIGHT, WIDTH, 3))
train_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range = 50,
horizontal_flip=True,
vertical_flip=True,
zoom_range=0.3,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
fill_mode='nearest'
)
train_generator = train_datagen.flow_from_directory(TRAIN_DIR,
target_size=(HEIGHT, WIDTH),
class_mode='categorical' ,
batch_size=BATCH_SIZE)
val_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input
)
val_generator = train_datagen.flow_from_directory(VAL_DIR,
target_size=(HEIGHT, WIDTH),
batch_size=BATCH_SIZE,
class_mode='categorical' ,
shuffle = False)
test_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input
)
test_generator = test_datagen.flow_from_directory(TEST_DIR,
target_size=(HEIGHT, WIDTH),
batch_size=BATCH_SIZE,
shuffle = False)
# def build_finetune_model(base_model, dropout, fc_layers, num_classes):
# for layer in base_model.layers:
# layer.trainable = False
# x = base_model.output
# x = Flatten()(x)
# for fc in fc_layers:
# # New FC layer, random init
# x = Dense(fc, activation='relu')(x)
# x = Dropout(dropout)(x)
# # New softmax layer
# predictions = Dense(num_classes, activation='softmax')(x)
# finetune_model = Model(inputs=base_model.input, outputs=predictions)
# return finetune_model
print(len(base_model.layers))
for layer in base_model.layers:
layer.trainable = False
headModel = base_model.output
headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(256, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = BatchNormalization()(headModel)
headModel = Dense(NUM_CLASSES, activation="softmax")(headModel)
# FC_LAYERS = [1024, 1024]
# dropout = 0.5
# finetune_model = build_finetune_model(base_model,
# dropout=dropout,
# fc_layers=FC_LAYERS,
# num_classes=22)
model = Model(inputs=base_model.input, outputs=headModel)
adam = Adam(lr=0.01)
model.compile(adam, loss='categorical_crossentropy', metrics=['accuracy'])
model_path = "Model_Weights/ResNet50" + "_model_weights_keras.h5"
checkpoint = ModelCheckpoint(filepath = model_path, monitor="val_accuracy", mode = 'max', save_best_only=True)
earlystop = EarlyStopping(monitor="val_accuracy", patience=8, restore_best_weights=True)
callbacks_list = [checkpoint, earlystop]
history = model.fit_generator(train_generator, epochs = NUM_EPOCHS, workers=8,
steps_per_epoch = num_train_images // BATCH_SIZE,
validation_data = val_generator,
shuffle=True, callbacks=callbacks_list)
# Plot the training and validation loss + accuracy
def plot_training(history):
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
plot_training(history)
scores = model.evaluate_generator(val_generator)
print("Accuracy = ", scores[1])
base_model.trainable = True
for ix in range(169):
base_model.layers[ix].trainable = False
for layer in base_model.layers:
if isinstance(layer, BatchNormalization):
layer.trainable = False
adam = Adam(lr=0.0001)
model.compile(adam, loss='categorical_crossentropy', metrics=['accuracy'])
model_path = "Model_Weights/ResNet50" + "_model_weights_keras_finetuned.h5"
BATCH_SIZE = 32
checkpoint = ModelCheckpoint(filepath = model_path, monitor="val_accuracy", mode = 'max', save_best_only=True)
earlystop = EarlyStopping(monitor="val_accuracy", patience=10, restore_best_weights=True)
callbacks_list = [checkpoint, earlystop]
history = model.fit_generator(train_generator, epochs = NUM_EPOCHS, workers=8,
steps_per_epoch = num_train_images // BATCH_SIZE,
validation_data = val_generator,
shuffle=True, callbacks=callbacks_list)
plot_training(history)
scores = model.evaluate_generator(val_generator)
print("Accuracy = ", scores[1])
pred = model.predict_generator(test_generator, workers=8)
print(pred[0].argmax())
y_classes = pred.argmax(axis=1)
print(y_classes[0])
print(pred.shape)
print(val_generator.class_indices)
inv_map = {v: k for k, v in val_generator.class_indices.items()}
print(inv_map)
submission = pd.DataFrame(data= {'image': test_generator.filenames , 'sports' : np.argmax(pred, axis=1)} )
submission = submission.replace(inv_map)
print(submission.head())
filename = 'Sports_Image_Classification.csv'
submission.to_csv(filename,index=False)
print('Saved file: ' + filename)
Sports_Image_Classification = pd.read_csv('Sports_Image_Classification.csv')
img_pred = {}
for idx in Sports_Image_Classification.index:
img_no = Sports_Image_Classification['image'][idx].split('/')[-1]
label = Sports_Image_Classification['sports'][idx]
img_pred[img_no] = label
print(len(img_pred))
test_images_list = pd.read_csv('test_images_list.csv')
df_pred = pd.DataFrame(columns=['image', 'sports'])
cnt = 0
for idx in test_images_list.index:
img_name = test_images_list['image'][idx]
img_no = test_images_list['image'][idx].split('/')[-1]
label = img_pred[img_no]
df_pred = df_pred.append({'image' : img_name, 'sports' : label}, ignore_index=True)
print(df_pred.shape)
print(df_pred.head())
# print(df_pred.shape)
# print(df_pred.head())
df_pred.to_csv('second_sub.csv', index = False)