#!/usr/bin/env python


import numpy as np
import struct

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import np_utils


def load_data(filename_images, filename_labels, verbose=True):
    '''
    Load images and labels from binary files

    Args:
        filename_images: Filename of binary file with image data
        filename_labels: Filename of binary file with label data

    Returns:
        images: Array with images of size (num_images,28,28)
        labels: Array with labesl of size (num_images)
    '''

    # Get images
    with open(filename_images, 'rb') as f:
        magic, num_images, rows, cols = struct.unpack(">IIII", f.read(16))
        images = np.fromfile(f, dtype=np.uint8).reshape(num_images, rows, cols)

    # Get labels
    with open(filename_labels, 'rb') as f:
        magic, num_labels = struct.unpack(">II", f.read(8))
        labels = np.fromfile(f, dtype=np.int8)

    # Convert from uint8 to float32
    images = images.astype('float32')
    labels = labels.astype('float32')

    # Expand dimensions of images because Keras expects images
    # of shape (height, width, channels) but a greyscale image can
    # be represented by a matrix with shape (height, width).
    images = np.expand_dims(images, axis=-1)

    if verbose:
        print('Loaded {} images and labels from files {} and {}.'.format(
                num_images, filename_images, filename_labels))

    return images, labels


# FIXME: Remove this? But they'll find this anyway because it's in the Keras tutorial...
def example_model():
    '''
    Example model for digit image classification from the Keras package itself.
    This model achieves 99% global accuracy on the test dataset within roughly
    15 epochs.

    Returns:
        model: Keras model for MNIST image classification
    '''

    model = Sequential()
    model.add(Conv2D(32, (3, 3), kernel_initializer='glorot_normal', input_shape=(28,28,1)))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3), kernel_initializer='glorot_normal'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(128, kernel_initializer='glorot_normal'))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, kernel_initializer='glorot_uniform'))
    model.add(Activation('softmax'))

    return model


if __name__ == '__main__':
    # Load training data
    images, labels = load_data('train_images.bin', 'train_labels.bin')

    # Convert labels from integers to one-hot vectors
    labels = np_utils.to_categorical(labels, 10)

    # Set up model
    model = Sequential()
    model.add(Conv2D(4, (2, 2), kernel_initializer='glorot_normal', input_shape=(28,28,1)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Flatten())
    model.add(Dense(16, kernel_initializer='glorot_normal'))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, kernel_initializer='glorot_uniform'))
    model.add(Activation('softmax'))

    # Define loss function, optimizer algorithm and validation metrics
    model.compile(
            loss='categorical_crossentropy',
            optimizer=Adam(),
            metrics=['categorical_accuracy'])

    # Print summary of the model
    model.summary()

    # Train model
    model.fit(images, labels, batch_size=128, epochs=20, validation_split=0.25)

    # Save model to file
    model.save('model.hd5')