mirror of
https://github.com/gradio-app/gradio.git
synced 2025-03-07 11:46:51 +08:00
removed embeddings
This commit is contained in:
parent
eb7bc8f98e
commit
0f19b7ad30
@ -1,58 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
SMALL_CONST = 1e-10
|
||||
|
||||
class PCA:
|
||||
"""
|
||||
Credit: https://www.python-engineer.com/courses/mlfromscratch/11_pca/
|
||||
"""
|
||||
def __init__(self, n_components, random_state):
|
||||
self.n_components = n_components
|
||||
self.components = None
|
||||
self.mean = None
|
||||
self.random_state = random_state
|
||||
|
||||
def fit(self, X):
|
||||
np.random.seed(self.random_state)
|
||||
self.mean = np.mean(X, axis=0)
|
||||
X = X - self.mean
|
||||
cov = np.cov(X.T)
|
||||
eigenvalues, eigenvectors = np.linalg.eig(cov)
|
||||
eigenvectors = eigenvectors.T
|
||||
idxs = np.argsort(eigenvalues)[::-1]
|
||||
eigenvalues = eigenvalues[idxs]
|
||||
eigenvectors = eigenvectors[idxs]
|
||||
self.components = np.real(eigenvectors[0:self.n_components])
|
||||
|
||||
def transform(self, X):
|
||||
X = X - self.mean
|
||||
return np.dot(X, self.components.T)
|
||||
|
||||
def fit_transform(self, X):
|
||||
self.fit(X)
|
||||
return self.transform(X)
|
||||
|
||||
|
||||
def calculate_similarity(embedding1, embedding2):
|
||||
"""
|
||||
Scores the similarity between two embeddings by taking the cosine similarity
|
||||
"""
|
||||
e1, e2 = np.array(embedding1), np.array(embedding2)
|
||||
cosine_similarity = np.dot(e1, e2) / (np.linalg.norm(e1) * np.linalg.norm(e2) + SMALL_CONST)
|
||||
return cosine_similarity
|
||||
|
||||
def fit_pca_to_embeddings(embeddings):
|
||||
"""
|
||||
Computes 2D tsne embeddings from a list of higher-dimensional embeddings
|
||||
"""
|
||||
pca_model = PCA(n_components=2, random_state=0)
|
||||
embeddings = np.array(embeddings)
|
||||
embeddings_2D = pca_model.fit_transform(embeddings)
|
||||
return pca_model, [{'x': e[0], 'y': e[1]} for e in embeddings_2D.tolist()]
|
||||
|
||||
def transform_with_pca(pca_model, embeddings):
|
||||
"""
|
||||
Computes 2D tsne embeddings from a list of higher-dimensional embeddings
|
||||
"""
|
||||
embeddings_2D = pca_model.transform(embeddings)
|
||||
return [{'x': e[0], 'y': e[1]} for e in embeddings_2D.tolist()]
|
@ -69,16 +69,6 @@ class InputComponent(Component):
|
||||
'''
|
||||
pass
|
||||
|
||||
def embed(self, x):
|
||||
"""
|
||||
Return a default embedding for the *preprocessed* input to the interface. Used to compute similar inputs.
|
||||
x (Any): Input to interface
|
||||
Returns:
|
||||
(List[Float]): An embedding vector as a list or numpy array of floats
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class Textbox(InputComponent):
|
||||
"""
|
||||
Component creates a textbox for user to enter input. Provides a string as an argument to the wrapped function.
|
||||
@ -196,28 +186,6 @@ class Textbox(InputComponent):
|
||||
result.append((self.interpretation_separator, 0))
|
||||
return result
|
||||
|
||||
def _embed_text(self, text):
|
||||
"""
|
||||
Figures out a "reasonable" embedding for any particular text. Did it this way to avoid loading any
|
||||
external machine learning models, which would be slow and require additional dependencies.
|
||||
"""
|
||||
top_english_words = ['find', 'new', 'work', 'part', 'take', 'get', 'place', 'made', 'live', 'where', 'after', 'back', 'little', 'only', 'round', 'man', 'year', 'came', 'show', 'every', 'good', 'me', 'give', 'our', 'under', 'name', 'very', 'through', 'just', 'form', 'sentence', 'great', 'think', 'say', 'help', 'low', 'line', 'differ', 'turn', 'cause', 'much', 'mean', 'before', 'move', 'right', 'boy', 'old', 'too', 'same', 'tell', 'does', 'set', 'three', 'want', 'air', 'well', 'also', 'play', 'small', 'end', 'put', 'home', 'read', 'hand', 'port', 'large', 'spell', 'add', 'even', 'land', 'here', 'must', 'big', 'high', 'such', 'follow', 'act', 'why', 'ask', 'men', 'change', 'went', 'light', 'kind', 'off', 'need', 'house', 'picture', 'try', 'us', 'again', 'animal', 'point', 'mother', 'world', 'near', 'build', 'self', 'earth', 'father', 'head', 'stand', 'own', 'page', 'should', 'country', 'found', 'answer', 'school', 'grow', 'study', 'still', 'learn', 'plant', 'cover', 'food', 'sun', 'four', 'between', 'state', 'keep', 'eye', 'never', 'last', 'let', 'thought', 'city', 'tree', 'cross', 'farm', 'hard', 'start', 'might', 'story', 'saw', 'far', 'sea', 'draw', 'left', 'late', 'run', 'don\'t', 'while', 'press', 'close', 'night', 'real', 'life', 'few', 'north', 'open', 'seem', 'together', 'next', 'white', 'children', 'begin', 'got', 'walk', 'example', 'ease', 'paper', 'group', 'always', 'music', 'those', 'both', 'mark', 'often', 'letter', 'until', 'mile', 'river', 'car', 'feet', 'care', 'second', 'book', 'carry', 'took', 'science', 'eat', 'room', 'friend', 'began', 'idea', 'fish', 'mountain', 'stop', 'once', 'base', 'hear', 'horse', 'cut', 'sure', 'watch', 'color', 'face', 'wood', 'main', 'enough', 'plain', 'girl', 'usual', 'young', 'ready', 'above', 'ever', 'red', 'list', 'though', 'feel', 'talk', 'bird', 'soon', 'body', 'dog', 'family', 'direct', 'pose', 'leave', 'song', 'measure', 'door', 'product', 'black', 'short', 'numeral', 'class', 'wind', 'question', 'happen', 'complete', 'ship', 'area', 'half', 'rock', 'order', 'fire', 'south', 'problem', 'piece', 'told', 'knew', 'pass', 'since', 'top', 'whole', 'king', 'space', 'heard', 'best', 'hour', 'better', 'true', 'during', 'hundred', 'five', 'remember', 'step', 'early', 'hold', 'west', 'ground', 'interest', 'reach', 'fast', 'verb', 'sing', 'listen', 'six', 'table', 'travel', 'less', 'morning', 'ten', 'simple', 'several', 'vowel', 'toward', 'war', 'lay', 'against', 'pattern', 'slow', 'center', 'love', 'person', 'money', 'serve', 'appear', 'road', 'map', 'rain', 'rule', 'govern', 'pull', 'cold', 'notice', 'voice', 'unit', 'power', 'town', 'fine', 'certain', 'fly', 'fall', 'lead', 'cry', 'dark', 'machine', 'note', 'wait', 'plan', 'figure', 'star', 'box', 'noun', 'field', 'rest', 'correct', 'able', 'pound', 'done', 'beauty', 'drive', 'stood', 'contain', 'front', 'teach', 'week', 'final', 'gave', 'green', 'oh', 'quick', 'develop', 'ocean', 'warm', 'free', 'minute', 'strong', 'special', 'mind', 'behind', 'clear', 'tail', 'produce', 'fact', 'street', 'inch', 'multiply', 'nothing', 'course', 'stay', 'wheel', 'full', 'force', 'blue', 'object', 'decide', 'surface', 'deep', 'moon', 'island', 'foot', 'system', 'busy', 'test', 'record', 'boat', 'common', 'gold', 'possible', 'plane', 'stead', 'dry', 'wonder', 'laugh', 'thousand', 'ago', 'ran', 'check', 'game', 'shape', 'equate', 'hot', 'miss', 'brought', 'heat', 'snow', 'tire', 'bring', 'yes', 'distant', 'fill', 'east', 'paint', 'language', 'among', 'grand', 'ball', 'yet', 'wave', 'drop', 'heart', 'am', 'present', 'heavy', 'dance', 'engine', 'position', 'arm', 'wide', 'sail', 'material', 'size', 'vary', 'settle', 'speak', 'weight', 'general', 'ice', 'matter', 'circle', 'pair', 'include', 'divide', 'syllable', 'felt', 'perhaps', 'pick', 'sudden', 'count', 'square', 'reason', 'length', 'represent', 'art', 'subject', 'region', 'energy', 'hunt', 'probable', 'bed', 'brother', 'egg', 'ride', 'cell', 'believe', 'fraction', 'forest', 'sit', 'race', 'window', 'store', 'summer', 'train', 'sleep', 'prove', 'lone', 'leg',
|
||||
'exercise', 'wall', 'catch', 'mount', 'wish', 'sky', 'board', 'joy', 'winter', 'sat', 'written', 'wild', 'instrument', 'kept', 'glass', 'grass', 'cow', 'job', 'edge', 'sign', 'visit', 'past', 'soft', 'fun', 'bright', 'gas', 'weather', 'month', 'million', 'bear', 'finish', 'happy', 'hope', 'flower', 'clothe', 'strange', 'gone', 'jump', 'baby', 'eight', 'village', 'meet', 'root', 'buy', 'raise', 'solve', 'metal', 'whether', 'push', 'seven', 'paragraph', 'third', 'shall', 'held', 'hair', 'describe', 'cook', 'floor', 'either', 'result', 'burn', 'hill', 'safe', 'cat', 'century', 'consider', 'type', 'law', 'bit', 'coast', 'copy', 'phrase', 'silent', 'tall', 'sand', 'soil', 'roll', 'temperature', 'finger', 'industry', 'value', 'fight', 'lie', 'beat', 'excite', 'natural', 'view', 'sense', 'ear', 'else', 'quite', 'broke', 'case', 'middle', 'kill', 'son', 'lake', 'moment', 'scale', 'loud', 'spring', 'observe', 'child', 'straight', 'consonant', 'nation', 'dictionary', 'milk', 'speed', 'method', 'organ', 'pay', 'age', 'section', 'dress', 'cloud', 'surprise', 'quiet', 'stone', 'tiny', 'climb', 'cool', 'design', 'poor', 'lot', 'experiment', 'bottom', 'key', 'iron', 'single', 'stick', 'flat', 'twenty', 'skin', 'smile', 'crease', 'hole', 'trade', 'melody', 'trip', 'office', 'receive', 'row', 'mouth', 'exact', 'symbol', 'die', 'least', 'trouble', 'shout', 'except', 'wrote', 'seed', 'tone', 'join', 'suggest', 'clean', 'break', 'lady', 'yard', 'rise', 'bad', 'blow', 'oil', 'blood', 'touch', 'grew', 'cent', 'mix', 'team', 'wire', 'cost', 'lost', 'brown', 'wear', 'garden', 'equal', 'sent', 'choose', 'fell', 'fit', 'flow', 'fair', 'bank', 'collect', 'save', 'control', 'decimal', 'gentle', 'woman', 'captain', 'practice', 'separate', 'difficult', 'doctor', 'please', 'protect', 'noon', 'whose', 'locate', 'ring', 'character', 'insect', 'caught', 'period', 'indicate', 'radio', 'spoke', 'atom', 'human', 'history', 'effect', 'electric', 'expect', 'crop', 'modern', 'element', 'hit', 'student', 'corner', 'party', 'supply', 'bone', 'rail', 'imagine', 'provide', 'agree', 'thus', 'capital', 'won\'t', 'chair', 'danger', 'fruit', 'rich', 'thick', 'soldier', 'process', 'operate', 'guess', 'necessary', 'sharp', 'wing', 'create', 'neighbor', 'wash', 'bat', 'rather', 'crowd', 'corn', 'compare', 'poem', 'string', 'bell', 'depend', 'meat', 'rub', 'tube', 'famous', 'dollar', 'stream', 'fear', 'sight', 'thin', 'triangle', 'planet', 'hurry', 'chief', 'colony', 'clock', 'mine', 'tie', 'enter', 'major', 'fresh', 'search', 'send', 'yellow', 'gun', 'allow', 'print', 'dead', 'spot', 'desert', 'suit', 'current', 'lift', 'rose', 'continue', 'block', 'chart', 'hat', 'sell', 'success', 'company', 'subtract', 'event', 'particular', 'deal', 'swim', 'term', 'opposite', 'wife', 'shoe', 'shoulder', 'spread', 'arrange', 'camp', 'invent', 'cotton', 'born', 'determine', 'quart', 'nine', 'truck', 'noise', 'level', 'chance', 'gather', 'shop', 'stretch', 'throw', 'shine', 'property', 'column', 'molecule', 'select', 'wrong', 'gray', 'repeat', 'require', 'broad', 'prepare', 'salt', 'nose', 'plural', 'anger', 'claim', 'continent', 'oxygen', 'sugar', 'death', 'pretty', 'skill', 'women', 'season', 'solution', 'magnet', 'silver', 'thank', 'branch', 'match', 'suffix', 'especially', 'fig', 'afraid', 'huge', 'sister', 'steel', 'discuss', 'forward', 'similar', 'guide', 'experience', 'score', 'apple', 'bought', 'led', 'pitch', 'coat', 'mass', 'card', 'band', 'rope', 'slip', 'win', 'dream', 'evening', 'condition', 'feed', 'tool', 'total', 'basic', 'smell', 'valley', 'nor', 'double', 'seat', 'arrive', 'master', 'track', 'parent', 'shore', 'division', 'sheet', 'substance', 'favor', 'connect', 'post', 'spend', 'chord', 'fat', 'glad', 'original', 'share', 'station', 'dad', 'bread', 'charge', 'proper', 'bar', 'offer', 'segment', 'slave', 'duck', 'instant', 'market', 'degree', 'populate', 'chick', 'dear', 'enemy', 'reply', 'drink', 'occur', 'support', 'speech', 'nature', 'range', 'steam', 'motion', 'path', 'liquid', 'log', 'meant', 'quotient', 'teeth', 'shell', 'neck']
|
||||
words = text.split(' ')
|
||||
return np.array([w in words for w in top_english_words])
|
||||
|
||||
def embed(self, x):
|
||||
"""
|
||||
Embeds an arbitrary text based on word frequency
|
||||
"""
|
||||
if self.type == "str":
|
||||
return self._embed_text(x)
|
||||
elif self.type == "number":
|
||||
return [float(x)]
|
||||
else:
|
||||
raise ValueError("Unknown type: " + str(self.type) +
|
||||
". Please choose from: 'str', 'number'.")
|
||||
|
||||
|
||||
class Number(InputComponent):
|
||||
"""
|
||||
@ -298,9 +266,6 @@ class Number(InputComponent):
|
||||
interpretation.insert(int(len(interpretation) / 2), [x, None])
|
||||
return interpretation
|
||||
|
||||
def embed(self, x):
|
||||
return [float(x)]
|
||||
|
||||
|
||||
class Slider(InputComponent):
|
||||
"""
|
||||
@ -371,9 +336,6 @@ class Slider(InputComponent):
|
||||
"""
|
||||
return scores
|
||||
|
||||
def embed(self, x):
|
||||
return [float(x)]
|
||||
|
||||
|
||||
class Checkbox(InputComponent):
|
||||
"""
|
||||
@ -431,9 +393,6 @@ class Checkbox(InputComponent):
|
||||
else:
|
||||
return None, scores[0]
|
||||
|
||||
def embed(self, x):
|
||||
return [float(x)]
|
||||
|
||||
|
||||
class CheckboxGroup(InputComponent):
|
||||
"""
|
||||
@ -504,15 +463,6 @@ class CheckboxGroup(InputComponent):
|
||||
final_scores.append(score_set)
|
||||
return final_scores
|
||||
|
||||
def embed(self, x):
|
||||
if self.type == "value":
|
||||
return [float(choice in x) for choice in self.choices]
|
||||
elif self.type == "index":
|
||||
return [float(index in x) for index in range(len(self.choices))]
|
||||
else:
|
||||
raise ValueError("Unknown type: " + str(self.type) +
|
||||
". Please choose from: 'value', 'index'.")
|
||||
|
||||
def save_flagged(self, dir, label, data, encryption_key):
|
||||
"""
|
||||
Returns: (List[str]])
|
||||
@ -580,15 +530,6 @@ class Radio(InputComponent):
|
||||
scores.insert(self.choices.index(x), None)
|
||||
return scores
|
||||
|
||||
def embed(self, x):
|
||||
if self.type == "value":
|
||||
return [float(choice == x) for choice in self.choices]
|
||||
elif self.type == "index":
|
||||
return [float(index == x) for index in range(len(self.choices))]
|
||||
else:
|
||||
raise ValueError("Unknown type: " + str(self.type) +
|
||||
". Please choose from: 'value', 'index'.")
|
||||
|
||||
|
||||
class Dropdown(InputComponent):
|
||||
"""
|
||||
@ -647,16 +588,6 @@ class Dropdown(InputComponent):
|
||||
scores.insert(self.choices.index(x), None)
|
||||
return scores
|
||||
|
||||
def embed(self, x):
|
||||
if self.type == "value":
|
||||
return [float(choice == x) for choice in self.choices]
|
||||
elif self.type == "index":
|
||||
return [float(index == x) for index in range(len(self.choices))]
|
||||
else:
|
||||
raise ValueError("Unknown type: " + str(self.type) +
|
||||
". Please choose from: 'value', 'index'.")
|
||||
|
||||
|
||||
class Image(InputComponent):
|
||||
"""
|
||||
Component creates an image upload box with editing capabilities.
|
||||
@ -822,20 +753,6 @@ class Image(InputComponent):
|
||||
output_scores = (output_scores - min_val) / (max_val - min_val)
|
||||
return output_scores.tolist()
|
||||
|
||||
def embed(self, x):
|
||||
shape = (100, 100) if self.shape is None else self.shape
|
||||
if self.type == "pil":
|
||||
im = x
|
||||
elif self.type == "numpy":
|
||||
im = PIL.Image.fromarray(x)
|
||||
elif self.type == "file":
|
||||
im = PIL.Image.open(x)
|
||||
else:
|
||||
raise ValueError("Unknown type: " + str(self.type) +
|
||||
". Please choose from: 'numpy', 'pil', 'file'.")
|
||||
im = processing_utils.resize_and_crop(im, (shape[0], shape[1]))
|
||||
return np.asarray(im).flatten()
|
||||
|
||||
def save_flagged(self, dir, label, data, encryption_key):
|
||||
"""
|
||||
Returns: (str) path to image file
|
||||
@ -1109,9 +1026,6 @@ class File(InputComponent):
|
||||
else:
|
||||
return [process_single_file(f) for f in x]
|
||||
|
||||
def embed(self, x):
|
||||
raise NotImplementedError("File doesn't currently support embeddings")
|
||||
|
||||
def save_flagged(self, dir, label, data, encryption_key):
|
||||
"""
|
||||
Returns: (str) path to file
|
||||
@ -1191,10 +1105,6 @@ class Dataframe(InputComponent):
|
||||
raise ValueError("Unknown type: " + str(self.type) +
|
||||
". Please choose from: 'pandas', 'numpy', 'array'.")
|
||||
|
||||
def embed(self, x):
|
||||
raise NotImplementedError(
|
||||
"DataFrame doesn't currently support embeddings")
|
||||
|
||||
def save_flagged(self, dir, label, data, encryption_key):
|
||||
"""
|
||||
Returns: (List[List[Union[str, float]]]) 2D array
|
||||
@ -1250,10 +1160,6 @@ class Timeseries(InputComponent):
|
||||
dataframe = dataframe.loc[dataframe[self.x or 0] <= x["range"][1]]
|
||||
return dataframe
|
||||
|
||||
def embed(self, x):
|
||||
raise NotImplementedError(
|
||||
"DataFrame doesn't currently support embeddings")
|
||||
|
||||
def save_flagged(self, dir, label, data, encryption_key):
|
||||
"""
|
||||
Returns: (List[List[Union[str, float]]]) 2D array
|
||||
|
@ -20,7 +20,6 @@ import requests
|
||||
import sys
|
||||
import csv
|
||||
import logging
|
||||
from gradio.embeddings import calculate_similarity, fit_pca_to_embeddings, transform_with_pca
|
||||
from gradio.tunneling import create_tunnel
|
||||
from gradio import encryptor
|
||||
from gradio import queue
|
||||
@ -209,66 +208,6 @@ def log_feature_analytics(feature):
|
||||
pass # do not push analytics if no network
|
||||
|
||||
|
||||
@app.route("/api/score_similarity/", methods=["POST"])
|
||||
@login_check
|
||||
def score_similarity():
|
||||
raw_input = request.json["data"]
|
||||
|
||||
preprocessed_input = [input_interface.preprocess(raw_input[i])
|
||||
for i, input_interface in enumerate(app.interface.input_components)]
|
||||
input_embedding = app.interface.embed(preprocessed_input)
|
||||
scores = list()
|
||||
|
||||
for example in app.interface.examples:
|
||||
preprocessed_example = [iface.preprocess(iface.preprocess_example(example))
|
||||
for iface, example in zip(app.interface.input_components, example)]
|
||||
example_embedding = app.interface.embed(preprocessed_example)
|
||||
scores.append(calculate_similarity(input_embedding, example_embedding))
|
||||
log_feature_analytics('score_similarity')
|
||||
return jsonify({"data": scores})
|
||||
|
||||
|
||||
@app.route("/api/view_embeddings/", methods=["POST"])
|
||||
@login_check
|
||||
def view_embeddings():
|
||||
sample_embedding = []
|
||||
if "data" in request.json:
|
||||
raw_input = request.json["data"]
|
||||
preprocessed_input = [input_interface.preprocess(raw_input[i])
|
||||
for i, input_interface in enumerate(app.interface.input_components)]
|
||||
sample_embedding.append(app.interface.embed(preprocessed_input))
|
||||
|
||||
example_embeddings = []
|
||||
for example in app.interface.examples:
|
||||
preprocessed_example = [iface.preprocess(iface.preprocess_example(example))
|
||||
for iface, example in zip(app.interface.input_components, example)]
|
||||
example_embedding = app.interface.embed(preprocessed_example)
|
||||
example_embeddings.append(example_embedding)
|
||||
|
||||
pca_model, embeddings_2d = fit_pca_to_embeddings(
|
||||
sample_embedding + example_embeddings)
|
||||
sample_embedding_2d = embeddings_2d[:len(sample_embedding)]
|
||||
example_embeddings_2d = embeddings_2d[len(sample_embedding):]
|
||||
app.pca_model = pca_model
|
||||
log_feature_analytics('view_embeddings')
|
||||
return jsonify({"sample_embedding_2d": sample_embedding_2d, "example_embeddings_2d": example_embeddings_2d})
|
||||
|
||||
|
||||
@app.route("/api/update_embeddings/", methods=["POST"])
|
||||
@login_check
|
||||
def update_embeddings():
|
||||
sample_embedding, sample_embedding_2d = [], []
|
||||
if "data" in request.json:
|
||||
raw_input = request.json["data"]
|
||||
preprocessed_input = [input_interface.preprocess(raw_input[i])
|
||||
for i, input_interface in enumerate(app.interface.input_components)]
|
||||
sample_embedding.append(app.interface.embed(preprocessed_input))
|
||||
sample_embedding_2d = transform_with_pca(
|
||||
app.pca_model, sample_embedding)
|
||||
|
||||
return jsonify({"sample_embedding_2d": sample_embedding_2d})
|
||||
|
||||
|
||||
def flag_data(input_data, output_data, flag_option=None, flag_index=None, username=None):
|
||||
flag_path = os.path.join(app.cwd, app.interface.flagging_dir)
|
||||
log_fp = "{}/log.csv".format(flag_path)
|
||||
|
Loading…
Reference in New Issue
Block a user