#!/usr/bin/env python import numpy as np import struct from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Conv2D, MaxPooling2D from keras.optimizers import Adam from keras.utils import np_utils def load_data(filename_images, filename_labels, verbose=True): ''' Load images and labels from binary files Args: filename_images: Filename of binary file with image data filename_labels: Filename of binary file with label data Returns: images: Array with images of size (num_images,28,28) labels: Array with labesl of size (num_images) ''' # Get images with open(filename_images, 'rb') as f: magic, num_images, rows, cols = struct.unpack(">IIII", f.read(16)) images = np.fromfile(f, dtype=np.uint8).reshape(num_images, rows, cols) # Get labels with open(filename_labels, 'rb') as f: magic, num_labels = struct.unpack(">II", f.read(8)) labels = np.fromfile(f, dtype=np.int8) # Convert from uint8 to float32 images = images.astype('float32') labels = labels.astype('float32') # Expand dimensions of images because Keras expects images # of shape (height, width, channels) but a greyscale image can # be represented by a matrix with shape (height, width). images = np.expand_dims(images, axis=-1) if verbose: print('Loaded {} images and labels from files {} and {}.'.format( num_images, filename_images, filename_labels)) return images, labels # FIXME: Remove this? But they'll find this anyway because it's in the Keras tutorial... def example_model(): ''' Example model for digit image classification from the Keras package itself. This model achieves 99% global accuracy on the test dataset within roughly 15 epochs. Returns: model: Keras model for MNIST image classification ''' model = Sequential() model.add(Conv2D(32, (3, 3), kernel_initializer='glorot_normal', input_shape=(28,28,1))) model.add(Activation('relu')) model.add(Conv2D(32, (3, 3), kernel_initializer='glorot_normal')) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, kernel_initializer='glorot_normal')) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(10, kernel_initializer='glorot_uniform')) model.add(Activation('softmax')) return model if __name__ == '__main__': # Load training data images, labels = load_data('train_images.bin', 'train_labels.bin') # Convert labels from integers to one-hot vectors labels = np_utils.to_categorical(labels, 10) # Set up model model = Sequential() model.add(Conv2D(4, (2, 2), kernel_initializer='glorot_normal', input_shape=(28,28,1))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Flatten()) model.add(Dense(16, kernel_initializer='glorot_normal')) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(10, kernel_initializer='glorot_uniform')) model.add(Activation('softmax')) # Define loss function, optimizer algorithm and validation metrics model.compile( loss='categorical_crossentropy', optimizer=Adam(), metrics=['categorical_accuracy']) # Print summary of the model model.summary() # Train model model.fit(images, labels, batch_size=128, epochs=20, validation_split=0.25) # Save model to file model.save('model.hd5')