diff --git a/build/lib/gradio/inputs.py b/build/lib/gradio/inputs.py
index c003edadbc..0629684f5b 100644
--- a/build/lib/gradio/inputs.py
+++ b/build/lib/gradio/inputs.py
@@ -93,7 +93,7 @@ class Sketchpad(AbstractInput):
         """
         Default preprocessing method for the SketchPad is to convert the sketch to black and white and resize 28x28
         """
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         im = im.convert('L')
         if self.invert_colors:
             im = ImageOps.invert(im)
@@ -111,7 +111,7 @@ class Sketchpad(AbstractInput):
         Default rebuild method to decode a base64 image
         """
         inp = msg['data']['input']
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         timestamp = datetime.datetime.now()
         filename = f'input_{timestamp.strftime("%Y-%m-%d-%H-%M-%S")}.png'
         im.save(f'{dir}/{filename}', 'PNG')
@@ -135,7 +135,7 @@ class Webcam(AbstractInput):
         """
         Default preprocessing method for is to convert the picture to black and white and resize to be 48x48
         """
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         im = im.convert('RGB')
         im = preprocessing_utils.resize_and_crop(im, (self.image_width, self.image_height))
         array = np.array(im).flatten().reshape(1, self.image_width, self.image_height, self.num_channels)
@@ -146,7 +146,7 @@ class Webcam(AbstractInput):
         Default rebuild method to decode a base64 image
         """
         inp = msg['data']['input']
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         timestamp = datetime.datetime.now()
         filename = f'input_{timestamp.strftime("%Y-%m-%d-%H-%M-%S")}.png'
         im.save(f'{dir}/{filename}', 'PNG')
@@ -203,7 +203,7 @@ class ImageUpload(AbstractInput):
         """
         Default preprocessing method for is to convert the picture to black and white and resize to be 48x48
         """
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             im = im.convert(self.image_mode)
@@ -222,7 +222,7 @@ class ImageUpload(AbstractInput):
         Default rebuild method to decode a base64 image
         """
         inp = msg['data']['input']
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         timestamp = datetime.datetime.now()
         filename = f'input_{timestamp.strftime("%Y-%m-%d-%H-%M-%S")}.png'
         im.save(f'{dir}/{filename}', 'PNG')
diff --git a/build/lib/gradio/outputs.py b/build/lib/gradio/outputs.py
index fa0bd8f003..33e33ed053 100644
--- a/build/lib/gradio/outputs.py
+++ b/build/lib/gradio/outputs.py
@@ -156,7 +156,7 @@ class Image(AbstractOutput):
         Default rebuild method to decode a base64 image
         """
         out = msg['data']['output']
-        im = preprocessing_utils.encoding_to_image(out)
+        im = preprocessing_utils.decode_base64_to_image(out)
         timestamp = datetime.datetime.now()
         filename = f'output_{timestamp.strftime("%Y-%m-%d-%H-%M-%S")}.png'
         im.save(f'{dir}/{filename}', 'PNG')
diff --git a/gradio/inputs.py b/gradio/inputs.py
index 6e3779326d..16a3b54234 100644
--- a/gradio/inputs.py
+++ b/gradio/inputs.py
@@ -12,6 +12,7 @@ import time
 import warnings
 import json
 
+
 # Where to find the static resources associated with each template.
 BASE_INPUT_INTERFACE_TEMPLATE_PATH = 'templates/input/{}.html'
 BASE_INPUT_INTERFACE_JS_PATH = 'static/js/interfaces/input/{}.js'
@@ -93,12 +94,11 @@ class Sketchpad(AbstractInput):
         """
         Default preprocessing method for the SketchPad is to convert the sketch to black and white and resize 28x28
         """
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         im = im.convert('L')
         if self.invert_colors:
             im = ImageOps.invert(im)
         im = im.resize((self.image_width, self.image_height))
-        # im = preprocessing_utils.resize_and_crop(im, (self.image_width, self.image_height))
         if self.flatten:
             array = np.array(im).flatten().reshape(1, self.image_width * self.image_height)
         else:
@@ -113,7 +113,7 @@ class Sketchpad(AbstractInput):
         Default rebuild method to decode a base64 image
         """
         inp = msg['data']['input']
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         timestamp = time.time()*1000
         filename = f'input_{timestamp}.png'
         im.save(f'{dir}/{filename}', 'PNG')
@@ -137,7 +137,7 @@ class Webcam(AbstractInput):
         """
         Default preprocessing method for is to convert the picture to black and white and resize to be 48x48
         """
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         im = im.convert('RGB')
         im = preprocessing_utils.resize_and_crop(im, (self.image_width, self.image_height))
         array = np.array(im).flatten().reshape(1, self.image_width, self.image_height, self.num_channels)
@@ -148,7 +148,7 @@ class Webcam(AbstractInput):
         Default rebuild method to decode a base64 image
         """
         inp = msg['data']['input']
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         timestamp = time.time()*1000
         filename = f'input_{timestamp}.png'
         im.save(f'{dir}/{filename}', 'PNG')
@@ -205,7 +205,7 @@ class ImageUpload(AbstractInput):
         """
         Default preprocessing method for is to convert the picture to black and white and resize to be 48x48
         """
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             im = im.convert(self.image_mode)
@@ -224,7 +224,7 @@ class ImageUpload(AbstractInput):
         Default rebuild method to decode a base64 image
         """
         inp = msg['data']['input']
-        im = preprocessing_utils.encoding_to_image(inp)
+        im = preprocessing_utils.decode_base64_to_image(inp)
         timestamp = time.time()*1000
         filename = f'input_{timestamp}.png'
         im.save(f'{dir}/{filename}', 'PNG')
@@ -266,9 +266,11 @@ class Microphone(AbstractInput):
 
     def preprocess(self, inp):
         """
-        By default, no pre-processing is applied to a microphone input file (TODO:aliabid94 fix this)
+        By default, no pre-processing is applied to a microphone input file
         """
-        return inp
+        file_obj = preprocessing_utils.decode_base64_to_wav_file(inp)
+        mfcc_array = preprocessing_utils.generate_mfcc_features_from_audio_file(file_obj.name)
+        return mfcc_array
 
     def rebuild_flagged(self, dir, msg):
         """
diff --git a/gradio/interface.py b/gradio/interface.py
index d40a8b2b3d..8d0a0d9508 100644
--- a/gradio/interface.py
+++ b/gradio/interface.py
@@ -136,6 +136,7 @@ class Interface:
         Method that calls the relevant method of the model object to make a prediction.
         :param preprocessed_input: the preprocessed input returned by the input interface
         """
+        print(preprocessed_input.shape)
         if self.model_type == "sklearn":
             return self.model_obj.predict(preprocessed_input)
         elif self.model_type == "keras":
diff --git a/gradio/networking.py b/gradio/networking.py
index cdd530b498..a935d64866 100644
--- a/gradio/networking.py
+++ b/gradio/networking.py
@@ -198,7 +198,7 @@ def serve_files_in_background(interface, port, directory_to_serve=None):
                 self._set_headers()
                 data_string = self.rfile.read(int(self.headers["Content-Length"]))
                 msg = json.loads(data_string)
-                img_orig = preprocessing_utils.encoding_to_image(msg["data"])
+                img_orig = preprocessing_utils.decode_base64_to_image(msg["data"])
                 img_orig = img_orig.convert('RGB')
                 img_orig = img_orig.resize((224, 224))
 
@@ -230,7 +230,7 @@ def serve_files_in_background(interface, port, directory_to_serve=None):
                 self._set_headers()
                 data_string = self.rfile.read(int(self.headers["Content-Length"]))
                 msg = json.loads(data_string)
-                img_orig = preprocessing_utils.encoding_to_image(msg["data"])
+                img_orig = preprocessing_utils.decode_base64_to_image(msg["data"])
                 img_orig = img_orig.convert('RGB')
                 img_orig = img_orig.resize((224, 224))
                 enhancer = ImageEnhance.Brightness(img_orig)
diff --git a/gradio/outputs.py b/gradio/outputs.py
index 95a6d2afd8..b52bd33acf 100644
--- a/gradio/outputs.py
+++ b/gradio/outputs.py
@@ -161,7 +161,7 @@ class Image(AbstractOutput):
         Default rebuild method to decode a base64 image
         """
         out = msg['data']['output']
-        im = preprocessing_utils.encoding_to_image(out)
+        im = preprocessing_utils.decode_base64_to_image(out)
         timestamp = datetime.datetime.now()
         filename = f'output_{timestamp.strftime("%Y-%m-%d-%H-%M-%S")}.png'
         im.save(f'{dir}/{filename}', 'PNG')
diff --git a/gradio/preprocessing_utils.py b/gradio/preprocessing_utils.py
index 3fc2f4e1b6..bb3025b9cc 100644
--- a/gradio/preprocessing_utils.py
+++ b/gradio/preprocessing_utils.py
@@ -1,13 +1,21 @@
 from PIL import Image
 from io import BytesIO
 import base64
+import tempfile
+import scipy.io.wavfile
+from scipy.fftpack import dct
+import numpy as np
 
 
-def encoding_to_image(encoding):
+#########################
+# IMAGE PRE-PROCESSING
+#########################
+def decode_base64_to_image(encoding):
     content = encoding.split(';')[1]
     image_encoded = content.split(',')[1]
     return Image.open(BytesIO(base64.b64decode(image_encoded)))
 
+
 def resize_and_crop(img, size, crop_type='top'):
     """
     Resize and crop an image to fit the specified size.
@@ -58,3 +66,89 @@ def resize_and_crop(img, size, crop_type='top'):
                          Image.ANTIALIAS)
         # If the scale is the same, we do not need to crop
     return img
+
+
+##################
+# AUDIO FILES
+##################
+
+def decode_base64_to_wav_file(encoding):
+    inp = encoding.split(';')[1].split(',')[1]
+    wav_obj = base64.b64decode(inp)
+    file_obj = tempfile.NamedTemporaryFile()
+    file_obj.close()
+    with open(file_obj.name, 'wb') as f:
+        f.write(wav_obj)
+    return file_obj
+
+
+def generate_mfcc_features_from_audio_file(wav_filename,
+                                           pre_emphasis=0.95,
+                                           frame_size= 0.025,
+                                           frame_stride=0.01,
+                                           NFFT=512,
+                                           nfilt=40,
+                                           num_ceps=12,
+                                           cep_lifter=22):
+    """
+    Loads and preprocesses a .wav audio file into mfcc coefficients, the typical inputs to models.
+    Adapted from: https://haythamfayek.com/2016/04/21/speech-processing-for-machine-learning.html
+    :param wav_filename: string name of audio file to process.
+    :param pre_emphasis: a float factor, typically 0.95 or 0.97, which amplifies high frequencies.
+    :param frame_size: a float that is the length, in seconds, of time frame over which to take the fft.
+    :param frame_stride: a float that is the offset, in seconds, between consecutive time frames.
+    :param NFFT: The number of points in the short-time fft for each time frame.
+    :param nfilt: The number of filters on the Mel-scale to extract frequency bands.
+    :param num_ceps: the number of cepstral coefficients to retrain.
+    :param cep_lifter: the int factor, by which to de-emphasize higher-frequency.
+    :return: a numpy array of mfcc coefficients.
+    """
+    sample_rate, signal = scipy.io.wavfile.read(wav_filename)
+    emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
+
+    frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate  # Convert from seconds to samples
+    signal_length = len(emphasized_signal)
+    frame_length = int(round(frame_length))
+    frame_step = int(round(frame_step))
+    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))  # Make sure that we have at least 1 frame
+
+    pad_signal_length = num_frames * frame_step + frame_length
+    z = np.zeros((pad_signal_length - signal_length))
+    pad_signal = np.append(emphasized_signal, z)  # Pad Signal to make sure that all frames have equal number of samples without truncating any samples from the original signal
+
+    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
+    frames = pad_signal[indices.astype(np.int32, copy=False)]
+
+    frames *= np.hamming(frame_length)
+    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))  # Magnitude of the FFT
+    pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2))  # Power Spectrum
+
+    low_freq_mel = 0
+    high_freq_mel = (2595 * np.log10(1 + (sample_rate / 2) / 700))  # Convert Hz to Mel
+    mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2)  # Equally spaced in Mel scale
+    hz_points = (700 * (10**(mel_points / 2595) - 1))  # Convert Mel to Hz
+    bin = np.floor((NFFT + 1) * hz_points / sample_rate)
+
+    fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
+    for m in range(1, nfilt + 1):
+        f_m_minus = int(bin[m - 1])   # left
+        f_m = int(bin[m])             # center
+        f_m_plus = int(bin[m + 1])    # right
+
+        for k in range(f_m_minus, f_m):
+            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
+        for k in range(f_m, f_m_plus):
+            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
+    filter_banks = np.dot(pow_frames, fbank.T)
+    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)  # Numerical Stability
+    filter_banks = 20 * np.log10(filter_banks)  # dB
+
+    mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 0: (num_ceps + 1)]  # Keep filters 1-13 by default.
+    (nframes, ncoeff) = mfcc.shape
+    n = np.arange(ncoeff)
+    lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter)
+    mfcc *= lift
+
+    filter_banks -= (np.mean(filter_banks, axis=0) + 1e-8)
+    mfcc -= (np.mean(mfcc, axis=0) + 1e-8)
+    return mfcc[np.newaxis, :, :]  # Create a batch dimension.
diff --git a/setup.py b/setup.py
index f1fe0fff0b..5a5d0e286d 100644
--- a/setup.py
+++ b/setup.py
@@ -20,5 +20,6 @@ setup(
         'requests',
         'psutil',
         'paramiko',
+        'scipy',
     ],
 )