removed embeddings

This commit is contained in:
Abubakar Abid 2021-10-14 00:42:44 -05:00
parent eb7bc8f98e
commit 0f19b7ad30
3 changed files with 0 additions and 213 deletions

View File

@ -1,58 +0,0 @@
import numpy as np
SMALL_CONST = 1e-10
class PCA:
"""
Credit: https://www.python-engineer.com/courses/mlfromscratch/11_pca/
"""
def __init__(self, n_components, random_state):
self.n_components = n_components
self.components = None
self.mean = None
self.random_state = random_state
def fit(self, X):
np.random.seed(self.random_state)
self.mean = np.mean(X, axis=0)
X = X - self.mean
cov = np.cov(X.T)
eigenvalues, eigenvectors = np.linalg.eig(cov)
eigenvectors = eigenvectors.T
idxs = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idxs]
eigenvectors = eigenvectors[idxs]
self.components = np.real(eigenvectors[0:self.n_components])
def transform(self, X):
X = X - self.mean
return np.dot(X, self.components.T)
def fit_transform(self, X):
self.fit(X)
return self.transform(X)
def calculate_similarity(embedding1, embedding2):
"""
Scores the similarity between two embeddings by taking the cosine similarity
"""
e1, e2 = np.array(embedding1), np.array(embedding2)
cosine_similarity = np.dot(e1, e2) / (np.linalg.norm(e1) * np.linalg.norm(e2) + SMALL_CONST)
return cosine_similarity
def fit_pca_to_embeddings(embeddings):
"""
Computes 2D tsne embeddings from a list of higher-dimensional embeddings
"""
pca_model = PCA(n_components=2, random_state=0)
embeddings = np.array(embeddings)
embeddings_2D = pca_model.fit_transform(embeddings)
return pca_model, [{'x': e[0], 'y': e[1]} for e in embeddings_2D.tolist()]
def transform_with_pca(pca_model, embeddings):
"""
Computes 2D tsne embeddings from a list of higher-dimensional embeddings
"""
embeddings_2D = pca_model.transform(embeddings)
return [{'x': e[0], 'y': e[1]} for e in embeddings_2D.tolist()]

View File

@ -69,16 +69,6 @@ class InputComponent(Component):
'''
pass
def embed(self, x):
"""
Return a default embedding for the *preprocessed* input to the interface. Used to compute similar inputs.
x (Any): Input to interface
Returns:
(List[Float]): An embedding vector as a list or numpy array of floats
"""
pass
class Textbox(InputComponent):
"""
Component creates a textbox for user to enter input. Provides a string as an argument to the wrapped function.
@ -196,28 +186,6 @@ class Textbox(InputComponent):
result.append((self.interpretation_separator, 0))
return result
def _embed_text(self, text):
"""
Figures out a "reasonable" embedding for any particular text. Did it this way to avoid loading any
external machine learning models, which would be slow and require additional dependencies.
"""
top_english_words = ['find', 'new', 'work', 'part', 'take', 'get', 'place', 'made', 'live', 'where', 'after', 'back', 'little', 'only', 'round', 'man', 'year', 'came', 'show', 'every', 'good', 'me', 'give', 'our', 'under', 'name', 'very', 'through', 'just', 'form', 'sentence', 'great', 'think', 'say', 'help', 'low', 'line', 'differ', 'turn', 'cause', 'much', 'mean', 'before', 'move', 'right', 'boy', 'old', 'too', 'same', 'tell', 'does', 'set', 'three', 'want', 'air', 'well', 'also', 'play', 'small', 'end', 'put', 'home', 'read', 'hand', 'port', 'large', 'spell', 'add', 'even', 'land', 'here', 'must', 'big', 'high', 'such', 'follow', 'act', 'why', 'ask', 'men', 'change', 'went', 'light', 'kind', 'off', 'need', 'house', 'picture', 'try', 'us', 'again', 'animal', 'point', 'mother', 'world', 'near', 'build', 'self', 'earth', 'father', 'head', 'stand', 'own', 'page', 'should', 'country', 'found', 'answer', 'school', 'grow', 'study', 'still', 'learn', 'plant', 'cover', 'food', 'sun', 'four', 'between', 'state', 'keep', 'eye', 'never', 'last', 'let', 'thought', 'city', 'tree', 'cross', 'farm', 'hard', 'start', 'might', 'story', 'saw', 'far', 'sea', 'draw', 'left', 'late', 'run', 'don\'t', 'while', 'press', 'close', 'night', 'real', 'life', 'few', 'north', 'open', 'seem', 'together', 'next', 'white', 'children', 'begin', 'got', 'walk', 'example', 'ease', 'paper', 'group', 'always', 'music', 'those', 'both', 'mark', 'often', 'letter', 'until', 'mile', 'river', 'car', 'feet', 'care', 'second', 'book', 'carry', 'took', 'science', 'eat', 'room', 'friend', 'began', 'idea', 'fish', 'mountain', 'stop', 'once', 'base', 'hear', 'horse', 'cut', 'sure', 'watch', 'color', 'face', 'wood', 'main', 'enough', 'plain', 'girl', 'usual', 'young', 'ready', 'above', 'ever', 'red', 'list', 'though', 'feel', 'talk', 'bird', 'soon', 'body', 'dog', 'family', 'direct', 'pose', 'leave', 'song', 'measure', 'door', 'product', 'black', 'short', 'numeral', 'class', 'wind', 'question', 'happen', 'complete', 'ship', 'area', 'half', 'rock', 'order', 'fire', 'south', 'problem', 'piece', 'told', 'knew', 'pass', 'since', 'top', 'whole', 'king', 'space', 'heard', 'best', 'hour', 'better', 'true', 'during', 'hundred', 'five', 'remember', 'step', 'early', 'hold', 'west', 'ground', 'interest', 'reach', 'fast', 'verb', 'sing', 'listen', 'six', 'table', 'travel', 'less', 'morning', 'ten', 'simple', 'several', 'vowel', 'toward', 'war', 'lay', 'against', 'pattern', 'slow', 'center', 'love', 'person', 'money', 'serve', 'appear', 'road', 'map', 'rain', 'rule', 'govern', 'pull', 'cold', 'notice', 'voice', 'unit', 'power', 'town', 'fine', 'certain', 'fly', 'fall', 'lead', 'cry', 'dark', 'machine', 'note', 'wait', 'plan', 'figure', 'star', 'box', 'noun', 'field', 'rest', 'correct', 'able', 'pound', 'done', 'beauty', 'drive', 'stood', 'contain', 'front', 'teach', 'week', 'final', 'gave', 'green', 'oh', 'quick', 'develop', 'ocean', 'warm', 'free', 'minute', 'strong', 'special', 'mind', 'behind', 'clear', 'tail', 'produce', 'fact', 'street', 'inch', 'multiply', 'nothing', 'course', 'stay', 'wheel', 'full', 'force', 'blue', 'object', 'decide', 'surface', 'deep', 'moon', 'island', 'foot', 'system', 'busy', 'test', 'record', 'boat', 'common', 'gold', 'possible', 'plane', 'stead', 'dry', 'wonder', 'laugh', 'thousand', 'ago', 'ran', 'check', 'game', 'shape', 'equate', 'hot', 'miss', 'brought', 'heat', 'snow', 'tire', 'bring', 'yes', 'distant', 'fill', 'east', 'paint', 'language', 'among', 'grand', 'ball', 'yet', 'wave', 'drop', 'heart', 'am', 'present', 'heavy', 'dance', 'engine', 'position', 'arm', 'wide', 'sail', 'material', 'size', 'vary', 'settle', 'speak', 'weight', 'general', 'ice', 'matter', 'circle', 'pair', 'include', 'divide', 'syllable', 'felt', 'perhaps', 'pick', 'sudden', 'count', 'square', 'reason', 'length', 'represent', 'art', 'subject', 'region', 'energy', 'hunt', 'probable', 'bed', 'brother', 'egg', 'ride', 'cell', 'believe', 'fraction', 'forest', 'sit', 'race', 'window', 'store', 'summer', 'train', 'sleep', 'prove', 'lone', 'leg',
'exercise', 'wall', 'catch', 'mount', 'wish', 'sky', 'board', 'joy', 'winter', 'sat', 'written', 'wild', 'instrument', 'kept', 'glass', 'grass', 'cow', 'job', 'edge', 'sign', 'visit', 'past', 'soft', 'fun', 'bright', 'gas', 'weather', 'month', 'million', 'bear', 'finish', 'happy', 'hope', 'flower', 'clothe', 'strange', 'gone', 'jump', 'baby', 'eight', 'village', 'meet', 'root', 'buy', 'raise', 'solve', 'metal', 'whether', 'push', 'seven', 'paragraph', 'third', 'shall', 'held', 'hair', 'describe', 'cook', 'floor', 'either', 'result', 'burn', 'hill', 'safe', 'cat', 'century', 'consider', 'type', 'law', 'bit', 'coast', 'copy', 'phrase', 'silent', 'tall', 'sand', 'soil', 'roll', 'temperature', 'finger', 'industry', 'value', 'fight', 'lie', 'beat', 'excite', 'natural', 'view', 'sense', 'ear', 'else', 'quite', 'broke', 'case', 'middle', 'kill', 'son', 'lake', 'moment', 'scale', 'loud', 'spring', 'observe', 'child', 'straight', 'consonant', 'nation', 'dictionary', 'milk', 'speed', 'method', 'organ', 'pay', 'age', 'section', 'dress', 'cloud', 'surprise', 'quiet', 'stone', 'tiny', 'climb', 'cool', 'design', 'poor', 'lot', 'experiment', 'bottom', 'key', 'iron', 'single', 'stick', 'flat', 'twenty', 'skin', 'smile', 'crease', 'hole', 'trade', 'melody', 'trip', 'office', 'receive', 'row', 'mouth', 'exact', 'symbol', 'die', 'least', 'trouble', 'shout', 'except', 'wrote', 'seed', 'tone', 'join', 'suggest', 'clean', 'break', 'lady', 'yard', 'rise', 'bad', 'blow', 'oil', 'blood', 'touch', 'grew', 'cent', 'mix', 'team', 'wire', 'cost', 'lost', 'brown', 'wear', 'garden', 'equal', 'sent', 'choose', 'fell', 'fit', 'flow', 'fair', 'bank', 'collect', 'save', 'control', 'decimal', 'gentle', 'woman', 'captain', 'practice', 'separate', 'difficult', 'doctor', 'please', 'protect', 'noon', 'whose', 'locate', 'ring', 'character', 'insect', 'caught', 'period', 'indicate', 'radio', 'spoke', 'atom', 'human', 'history', 'effect', 'electric', 'expect', 'crop', 'modern', 'element', 'hit', 'student', 'corner', 'party', 'supply', 'bone', 'rail', 'imagine', 'provide', 'agree', 'thus', 'capital', 'won\'t', 'chair', 'danger', 'fruit', 'rich', 'thick', 'soldier', 'process', 'operate', 'guess', 'necessary', 'sharp', 'wing', 'create', 'neighbor', 'wash', 'bat', 'rather', 'crowd', 'corn', 'compare', 'poem', 'string', 'bell', 'depend', 'meat', 'rub', 'tube', 'famous', 'dollar', 'stream', 'fear', 'sight', 'thin', 'triangle', 'planet', 'hurry', 'chief', 'colony', 'clock', 'mine', 'tie', 'enter', 'major', 'fresh', 'search', 'send', 'yellow', 'gun', 'allow', 'print', 'dead', 'spot', 'desert', 'suit', 'current', 'lift', 'rose', 'continue', 'block', 'chart', 'hat', 'sell', 'success', 'company', 'subtract', 'event', 'particular', 'deal', 'swim', 'term', 'opposite', 'wife', 'shoe', 'shoulder', 'spread', 'arrange', 'camp', 'invent', 'cotton', 'born', 'determine', 'quart', 'nine', 'truck', 'noise', 'level', 'chance', 'gather', 'shop', 'stretch', 'throw', 'shine', 'property', 'column', 'molecule', 'select', 'wrong', 'gray', 'repeat', 'require', 'broad', 'prepare', 'salt', 'nose', 'plural', 'anger', 'claim', 'continent', 'oxygen', 'sugar', 'death', 'pretty', 'skill', 'women', 'season', 'solution', 'magnet', 'silver', 'thank', 'branch', 'match', 'suffix', 'especially', 'fig', 'afraid', 'huge', 'sister', 'steel', 'discuss', 'forward', 'similar', 'guide', 'experience', 'score', 'apple', 'bought', 'led', 'pitch', 'coat', 'mass', 'card', 'band', 'rope', 'slip', 'win', 'dream', 'evening', 'condition', 'feed', 'tool', 'total', 'basic', 'smell', 'valley', 'nor', 'double', 'seat', 'arrive', 'master', 'track', 'parent', 'shore', 'division', 'sheet', 'substance', 'favor', 'connect', 'post', 'spend', 'chord', 'fat', 'glad', 'original', 'share', 'station', 'dad', 'bread', 'charge', 'proper', 'bar', 'offer', 'segment', 'slave', 'duck', 'instant', 'market', 'degree', 'populate', 'chick', 'dear', 'enemy', 'reply', 'drink', 'occur', 'support', 'speech', 'nature', 'range', 'steam', 'motion', 'path', 'liquid', 'log', 'meant', 'quotient', 'teeth', 'shell', 'neck']
words = text.split(' ')
return np.array([w in words for w in top_english_words])
def embed(self, x):
"""
Embeds an arbitrary text based on word frequency
"""
if self.type == "str":
return self._embed_text(x)
elif self.type == "number":
return [float(x)]
else:
raise ValueError("Unknown type: " + str(self.type) +
". Please choose from: 'str', 'number'.")
class Number(InputComponent):
"""
@ -298,9 +266,6 @@ class Number(InputComponent):
interpretation.insert(int(len(interpretation) / 2), [x, None])
return interpretation
def embed(self, x):
return [float(x)]
class Slider(InputComponent):
"""
@ -371,9 +336,6 @@ class Slider(InputComponent):
"""
return scores
def embed(self, x):
return [float(x)]
class Checkbox(InputComponent):
"""
@ -431,9 +393,6 @@ class Checkbox(InputComponent):
else:
return None, scores[0]
def embed(self, x):
return [float(x)]
class CheckboxGroup(InputComponent):
"""
@ -504,15 +463,6 @@ class CheckboxGroup(InputComponent):
final_scores.append(score_set)
return final_scores
def embed(self, x):
if self.type == "value":
return [float(choice in x) for choice in self.choices]
elif self.type == "index":
return [float(index in x) for index in range(len(self.choices))]
else:
raise ValueError("Unknown type: " + str(self.type) +
". Please choose from: 'value', 'index'.")
def save_flagged(self, dir, label, data, encryption_key):
"""
Returns: (List[str]])
@ -580,15 +530,6 @@ class Radio(InputComponent):
scores.insert(self.choices.index(x), None)
return scores
def embed(self, x):
if self.type == "value":
return [float(choice == x) for choice in self.choices]
elif self.type == "index":
return [float(index == x) for index in range(len(self.choices))]
else:
raise ValueError("Unknown type: " + str(self.type) +
". Please choose from: 'value', 'index'.")
class Dropdown(InputComponent):
"""
@ -647,16 +588,6 @@ class Dropdown(InputComponent):
scores.insert(self.choices.index(x), None)
return scores
def embed(self, x):
if self.type == "value":
return [float(choice == x) for choice in self.choices]
elif self.type == "index":
return [float(index == x) for index in range(len(self.choices))]
else:
raise ValueError("Unknown type: " + str(self.type) +
". Please choose from: 'value', 'index'.")
class Image(InputComponent):
"""
Component creates an image upload box with editing capabilities.
@ -822,20 +753,6 @@ class Image(InputComponent):
output_scores = (output_scores - min_val) / (max_val - min_val)
return output_scores.tolist()
def embed(self, x):
shape = (100, 100) if self.shape is None else self.shape
if self.type == "pil":
im = x
elif self.type == "numpy":
im = PIL.Image.fromarray(x)
elif self.type == "file":
im = PIL.Image.open(x)
else:
raise ValueError("Unknown type: " + str(self.type) +
". Please choose from: 'numpy', 'pil', 'file'.")
im = processing_utils.resize_and_crop(im, (shape[0], shape[1]))
return np.asarray(im).flatten()
def save_flagged(self, dir, label, data, encryption_key):
"""
Returns: (str) path to image file
@ -1109,9 +1026,6 @@ class File(InputComponent):
else:
return [process_single_file(f) for f in x]
def embed(self, x):
raise NotImplementedError("File doesn't currently support embeddings")
def save_flagged(self, dir, label, data, encryption_key):
"""
Returns: (str) path to file
@ -1191,10 +1105,6 @@ class Dataframe(InputComponent):
raise ValueError("Unknown type: " + str(self.type) +
". Please choose from: 'pandas', 'numpy', 'array'.")
def embed(self, x):
raise NotImplementedError(
"DataFrame doesn't currently support embeddings")
def save_flagged(self, dir, label, data, encryption_key):
"""
Returns: (List[List[Union[str, float]]]) 2D array
@ -1250,10 +1160,6 @@ class Timeseries(InputComponent):
dataframe = dataframe.loc[dataframe[self.x or 0] <= x["range"][1]]
return dataframe
def embed(self, x):
raise NotImplementedError(
"DataFrame doesn't currently support embeddings")
def save_flagged(self, dir, label, data, encryption_key):
"""
Returns: (List[List[Union[str, float]]]) 2D array

View File

@ -20,7 +20,6 @@ import requests
import sys
import csv
import logging
from gradio.embeddings import calculate_similarity, fit_pca_to_embeddings, transform_with_pca
from gradio.tunneling import create_tunnel
from gradio import encryptor
from gradio import queue
@ -209,66 +208,6 @@ def log_feature_analytics(feature):
pass # do not push analytics if no network
@app.route("/api/score_similarity/", methods=["POST"])
@login_check
def score_similarity():
raw_input = request.json["data"]
preprocessed_input = [input_interface.preprocess(raw_input[i])
for i, input_interface in enumerate(app.interface.input_components)]
input_embedding = app.interface.embed(preprocessed_input)
scores = list()
for example in app.interface.examples:
preprocessed_example = [iface.preprocess(iface.preprocess_example(example))
for iface, example in zip(app.interface.input_components, example)]
example_embedding = app.interface.embed(preprocessed_example)
scores.append(calculate_similarity(input_embedding, example_embedding))
log_feature_analytics('score_similarity')
return jsonify({"data": scores})
@app.route("/api/view_embeddings/", methods=["POST"])
@login_check
def view_embeddings():
sample_embedding = []
if "data" in request.json:
raw_input = request.json["data"]
preprocessed_input = [input_interface.preprocess(raw_input[i])
for i, input_interface in enumerate(app.interface.input_components)]
sample_embedding.append(app.interface.embed(preprocessed_input))
example_embeddings = []
for example in app.interface.examples:
preprocessed_example = [iface.preprocess(iface.preprocess_example(example))
for iface, example in zip(app.interface.input_components, example)]
example_embedding = app.interface.embed(preprocessed_example)
example_embeddings.append(example_embedding)
pca_model, embeddings_2d = fit_pca_to_embeddings(
sample_embedding + example_embeddings)
sample_embedding_2d = embeddings_2d[:len(sample_embedding)]
example_embeddings_2d = embeddings_2d[len(sample_embedding):]
app.pca_model = pca_model
log_feature_analytics('view_embeddings')
return jsonify({"sample_embedding_2d": sample_embedding_2d, "example_embeddings_2d": example_embeddings_2d})
@app.route("/api/update_embeddings/", methods=["POST"])
@login_check
def update_embeddings():
sample_embedding, sample_embedding_2d = [], []
if "data" in request.json:
raw_input = request.json["data"]
preprocessed_input = [input_interface.preprocess(raw_input[i])
for i, input_interface in enumerate(app.interface.input_components)]
sample_embedding.append(app.interface.embed(preprocessed_input))
sample_embedding_2d = transform_with_pca(
app.pca_model, sample_embedding)
return jsonify({"sample_embedding_2d": sample_embedding_2d})
def flag_data(input_data, output_data, flag_option=None, flag_index=None, username=None):
flag_path = os.path.join(app.cwd, app.interface.flagging_dir)
log_fp = "{}/log.csv".format(flag_path)