From 943cdd5f353c33103afd6e401cbc495935bff287 Mon Sep 17 00:00:00 2001
From: Freddy Boulton <alfonsoboulton@gmail.com>
Date: Tue, 25 Oct 2022 11:15:18 -0400
Subject: [PATCH] Add dashboard demo to Demos tab (#2508)

* Add demo

* Changelog

* Undo

* Fix formatting
---
 CHANGELOG.md                           |   2 +-
 demo/dashboard/DESCRIPTION.md          |   1 +
 demo/dashboard/helpers.py              | 167 +++++++++++++++++++++++++
 demo/dashboard/requirements.txt        |   1 +
 demo/dashboard/run.py                  |  73 +++++++++++
 website/homepage/src/demos/__init__.py |   4 +
 6 files changed, 247 insertions(+), 1 deletion(-)
 create mode 100644 demo/dashboard/DESCRIPTION.md
 create mode 100644 demo/dashboard/helpers.py
 create mode 100644 demo/dashboard/requirements.txt
 create mode 100644 demo/dashboard/run.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bfd6e9eb8d..07bd5a725c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -52,7 +52,7 @@ inference time of 80 seconds).
 * Fixes issue where plotly animations, interactivity, titles, legends, were not working properly. [@dawoodkhan82](https://github.com/dawoodkhan82) in [PR 2486](https://github.com/gradio-app/gradio/pull/2486)
 
 ## Documentation Changes:
-No changes to highlight.
+* Added an example interactive dashboard to the "Tabular & Plots" section of the Demos page by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 2508](https://github.com/gradio-app/gradio/pull/2508)
 
 ## Testing and Infrastructure Changes:
 No changes to highlight.
diff --git a/demo/dashboard/DESCRIPTION.md b/demo/dashboard/DESCRIPTION.md
new file mode 100644
index 0000000000..6bd25eec22
--- /dev/null
+++ b/demo/dashboard/DESCRIPTION.md
@@ -0,0 +1 @@
+This demo shows how you can build an interactive dashboard with gradio. Click on a python library on the left hand side and then on the right hand side click on the metric you'd like to see plot over time. Data is pulled from HuggingFace Hub datasets.
\ No newline at end of file
diff --git a/demo/dashboard/helpers.py b/demo/dashboard/helpers.py
new file mode 100644
index 0000000000..ca025d4111
--- /dev/null
+++ b/demo/dashboard/helpers.py
@@ -0,0 +1,167 @@
+import collections
+from datetime import datetime
+
+from datasets import DatasetDict, load_dataset
+import numpy as np
+
+datasets = {
+    "stars": load_dataset("open-source-metrics/stars").sort('dates'),
+    "issues": load_dataset("open-source-metrics/issues").sort('dates'),
+    "pip": load_dataset("open-source-metrics/pip").sort('day')
+}
+
+val = 0
+
+
+def _range(e):
+    global val
+    e['range'] = val
+    val += 1
+
+    current_date = datetime.strptime(e['dates'], "%Y-%m-%dT%H:%M:%SZ")
+    first_date = datetime.fromtimestamp(1)
+    week = abs(current_date - first_date).days // 7
+    e['week'] = week
+
+    return e
+
+
+def _ignore_org_members(e):
+    global val
+    e['range_non_org'] = val
+
+    if e['type']['authorAssociation'] != 'MEMBER':
+        val += 1
+
+    return e
+
+stars = {}
+for k, v in datasets['stars'].items():
+    stars[k] = v.map(_range)
+    val = 0
+
+issues = {}
+for k, v in datasets['issues'].items():
+    issues[k] = v.map(_range)
+    val = 0
+    issues[k] = issues[k].map(_ignore_org_members)
+    val = 0
+
+datasets['stars'] = DatasetDict(**stars)
+datasets['issues'] = DatasetDict(**issues)
+
+
+def link_values(library_names, returned_values):
+    previous_values = {library_name: None for library_name in library_names}
+    for library_name in library_names:
+        for i in returned_values.keys():
+            if library_name not in returned_values[i]:
+                returned_values[i][library_name] = previous_values[library_name]
+            else:
+                previous_values[library_name] = returned_values[i][library_name]
+
+    return returned_values
+
+
+def running_mean(x, N, total_length=-1):
+    cumsum = np.cumsum(np.insert(x, 0, 0))
+    to_pad = max(total_length - len(cumsum), 0)
+    return np.pad(cumsum[N:] - cumsum[:-N], (to_pad, 0)) / float(N)
+
+
+def retrieve_pip_installs(library_names, cummulated):
+
+    if cummulated:
+        returned_values = {}
+        for library_name in library_names:
+            for i in datasets['pip'][library_name]:
+                if i['day'] in returned_values:
+                    returned_values[i['day']]['Cumulated'] += i['num_downloads']
+                else:
+                    returned_values[i['day']] = {'Cumulated': i['num_downloads']}
+
+        library_names = ['Cumulated']
+
+    else:
+        returned_values = {}
+        for library_name in library_names:
+            for i in datasets['pip'][library_name]:
+                if i['day'] in returned_values:
+                    returned_values[i['day']][library_name] = i['num_downloads']
+                else:
+                    returned_values[i['day']] = {library_name: i['num_downloads']}
+
+        for library_name in library_names:
+            for i in returned_values.keys():
+                if library_name not in returned_values[i]:
+                    returned_values[i][library_name] = None
+
+    returned_values = collections.OrderedDict(sorted(returned_values.items()))
+    output = {l: [k[l] for k in returned_values.values()] for l in library_names}
+    output['day'] = list(returned_values.keys())
+    return output
+
+
+def retrieve_stars(libraries, week_over_week):
+    returned_values = {}
+    dataset_dict = datasets['stars']
+
+    for library_name in libraries:
+        dataset = dataset_dict[library_name]
+
+        last_value = 0
+        last_week = dataset[0]['week']
+        for i in dataset:
+            if week_over_week and last_week == i['week']:
+                continue
+            if i['dates'] in returned_values:
+                returned_values[i['dates']][library_name] = i['range'] - last_value
+            else:
+                returned_values[i['dates']] = {library_name: i['range'] - last_value}
+
+            last_value = i['range'] if week_over_week else 0
+            last_week = i['week']
+
+    returned_values = collections.OrderedDict(sorted(returned_values.items()))
+    returned_values = link_values(libraries, returned_values)
+    output = {l: [k[l] for k in returned_values.values()][::-1] for l in libraries}
+    output['day'] = list(returned_values.keys())[::-1]
+
+    # Trim down to a smaller number of points.
+    output = {k: [v for i, v in enumerate(value) if i % int(len(value) / 100) == 0] for k, value in output.items()}
+    return output
+
+
+def retrieve_issues(libraries, exclude_org_members, week_over_week):
+
+    returned_values = {}
+    dataset_dict = datasets['issues']
+    range_id = 'range' if not exclude_org_members else 'range_non_org'
+
+    for library_name in libraries:
+        dataset = dataset_dict[library_name]
+
+        last_value = 0
+        last_week = dataset[0]['week']
+        for i in dataset:
+            if week_over_week and last_week == i['week']:
+                continue
+
+            if i['dates'] in returned_values:
+                returned_values[i['dates']][library_name] = i[range_id] - last_value
+            else:
+                returned_values[i['dates']] = {library_name: i[range_id] - last_value}
+
+            last_value = i[range_id] if week_over_week else 0
+            last_week = i['week']
+
+    returned_values = collections.OrderedDict(sorted(returned_values.items()))
+    returned_values = link_values(libraries, returned_values)
+    output = {l: [k[l] for k in returned_values.values()][::-1] for l in libraries}
+    output['day'] = list(returned_values.keys())[::-1]
+
+    # Trim down to a smaller number of points.
+    output = {
+        k: [v for i, v in enumerate(value) if i % int(len(value) / 100) == 0] for k, value in output.items()
+    }
+    return output
diff --git a/demo/dashboard/requirements.txt b/demo/dashboard/requirements.txt
new file mode 100644
index 0000000000..d42d0ad03b
--- /dev/null
+++ b/demo/dashboard/requirements.txt
@@ -0,0 +1 @@
+plotly
\ No newline at end of file
diff --git a/demo/dashboard/run.py b/demo/dashboard/run.py
new file mode 100644
index 0000000000..24073040b1
--- /dev/null
+++ b/demo/dashboard/run.py
@@ -0,0 +1,73 @@
+import gradio as gr
+import pandas as pd
+import plotly.express as px
+from helpers import *
+
+
+LIBRARIES = ["accelerate", "datasets", "diffusers", "evaluate", "gradio", "hub_docs",
+             "huggingface_hub", "optimum", "pytorch_image_models", "tokenizers", "transformers"]
+
+
+def create_pip_plot(libraries, pip_choices):
+    if "Pip" not in pip_choices:
+        return gr.update(visible=False)
+    output = retrieve_pip_installs(libraries, "Cumulated" in pip_choices)
+    df = pd.DataFrame(output).melt(id_vars="day")
+    plot = px.line(df, x="day", y="value", color="variable",
+                   title="Pip installs")
+    plot.update_layout(legend=dict(x=0.5, y=0.99),  title_x=0.5, legend_title_text="")
+    return gr.update(value=plot, visible=True)
+
+
+def create_star_plot(libraries, star_choices):
+    if "Stars" not in star_choices:
+        return gr.update(visible=False)
+    output = retrieve_stars(libraries, "Week over Week" in star_choices)
+    df = pd.DataFrame(output).melt(id_vars="day")
+    plot = px.line(df, x="day", y="value", color="variable",
+                   title="Number of stargazers")
+    plot.update_layout(legend=dict(x=0.5, y=0.99),  title_x=0.5, legend_title_text="")
+    return gr.update(value=plot, visible=True)
+
+
+def create_issue_plot(libraries, issue_choices):
+    if "Issue" not in issue_choices:
+        return gr.update(visible=False)
+    output = retrieve_issues(libraries,
+                             exclude_org_members="Exclude org members" in issue_choices,
+                             week_over_week="Week over Week" in issue_choices)
+    df = pd.DataFrame(output).melt(id_vars="day")
+    plot = px.line(df, x="day", y="value", color="variable",
+                   title="Cumulated number of issues, PRs, and comments",
+                   )
+    plot.update_layout(legend=dict(x=0.5, y=0.99),  title_x=0.5, legend_title_text="")
+    return gr.update(value=plot, visible=True)
+
+
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            with gr.Box():
+                gr.Markdown("## Select libraries to display")
+                libraries = gr.CheckboxGroup(choices=LIBRARIES, label="")
+        with gr.Column():
+            with gr.Box():
+                gr.Markdown("## Select graphs to display")
+                pip = gr.CheckboxGroup(choices=["Pip", "Cumulated"], label="")
+                stars = gr.CheckboxGroup(choices=["Stars", "Week over Week"], label="")
+                issues = gr.CheckboxGroup(choices=["Issue", "Exclude org members", "week over week"], label="")
+    with gr.Row():
+        fetch = gr.Button(value="Fetch")
+    with gr.Row():
+        with gr.Column():
+            pip_plot = gr.Plot(visible=False)
+            star_plot = gr.Plot(visible=False)
+            issue_plot = gr.Plot(visible=False)
+
+    fetch.click(create_pip_plot, inputs=[libraries, pip], outputs=pip_plot)
+    fetch.click(create_star_plot, inputs=[libraries, stars], outputs=star_plot)
+    fetch.click(create_issue_plot, inputs=[libraries, issues], outputs=issue_plot)
+
+
+if __name__ == "__main__":
+    demo.launch()
\ No newline at end of file
diff --git a/website/homepage/src/demos/__init__.py b/website/homepage/src/demos/__init__.py
index a0abdf0e51..29790a0aed 100644
--- a/website/homepage/src/demos/__init__.py
+++ b/website/homepage/src/demos/__init__.py
@@ -77,6 +77,10 @@ demos_by_category = [
     {
         "category": "📈 Tabular Data & Plots",
         "demos": [
+            {
+                "name": "Interactive Dashboard",
+                "dir": "dashboard"
+            },
             {
                 "name": "Outbreak Forecast", 
                 "dir": "outbreak_forecast",