Queue concurrency count (#6172)

* changes * add changeset * changes * changes * changes * Update client/python/test/conftest.py Co-authored-by: Abubakar Abid <abubakar@huggingface.co> * changes * changes * changes --------- Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com> Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
2025-02-17 11:29:58 +08:00 · 2023-10-31 02:41:19 -07:00 · 2023-10-31 02:41:19 -07:00 · 79c8156ebb
commit 79c8156ebb
parent 59f5a4e30e
23 changed files with 254 additions and 133 deletions
--- a/.changeset/shaky-feet-boil.md
+++ b/.changeset/shaky-feet-boil.md
@ -0,0 +1,5 @@
+---
+"gradio": minor
+---
+
+feat:Queue concurrency count
--- a/client/python/test/conftest.py
+++ b/client/python/test/conftest.py
@ -38,7 +38,7 @@ def calculator_demo():
            [0, "subtract", 1.2],
        ],
    )
-    return demo.queue()
+    return demo


@pytest.fixture
@ -48,7 +48,7 @@ def state_demo():
        ["textbox", "state"],
        ["textbox", "state"],
    )
-    return demo.queue()
+    return demo


@pytest.fixture
@ -81,7 +81,7 @@ def increment_demo():
            api_name=False,
        )

-    return demo.queue()
+    return demo


@pytest.fixture
@ -92,7 +92,7 @@ def progress_demo():
            time.sleep(0.1)
        return x

-    return gr.Interface(my_function, gr.Textbox(), gr.Textbox()).queue()
+    return gr.Interface(my_function, gr.Textbox(), gr.Textbox())


@pytest.fixture
@ -126,7 +126,7 @@ def cancel_from_client_demo():
        btn2 = gr.Button(value="Long Process")
        btn2.click(long_process, None, num, api_name="long")

-    return demo.queue(concurrency_count=40)
+    return demo


@pytest.fixture
@ -177,7 +177,7 @@ def count_generator_demo():
        count_btn.click(count, num, out)
        list_btn.click(show, num, out)

-    return demo.queue()
+    return demo


@pytest.fixture
@ -202,7 +202,7 @@ def count_generator_no_api():
        count_btn.click(count, num, out, api_name=False)
        list_btn.click(show, num, out, api_name=False)

-    return demo.queue()
+    return demo


@pytest.fixture
@ -228,7 +228,7 @@ def count_generator_demo_exception():

        count_btn.click(count, num, out, api_name="count")
        count_forever.click(show, num, out, api_name="count_forever", every=3)
-    return demo.queue()
+    return demo


@pytest.fixture
@ -258,7 +258,6 @@ def stateful_chatbot():

        msg.submit(respond, [msg, st, chatbot], [msg, chatbot], api_name="submit")
        clear.click(lambda: None, None, chatbot, queue=False)
-        demo.queue()
    return demo


@ -346,7 +345,7 @@ def stream_audio():
        fn=_stream_audio,
        inputs=gr.Audio(type="filepath", label="Audio file to stream"),
        outputs=gr.Audio(autoplay=True, streaming=True),
-    ).queue()
+    )


@pytest.fixture
--- a/demo/cancel_events/run.ipynb
+++ b/demo/cancel_events/run.ipynb
@ -1 +1 @@
-{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: cancel_events"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import time\n", "import gradio as gr\n", "\n", "\n", "def fake_diffusion(steps):\n", "    for i in range(steps):\n", "        print(f\"Current step: {i}\")\n", "        time.sleep(1)\n", "        yield str(i)\n", "\n", "\n", "def long_prediction(*args, **kwargs):\n", "    time.sleep(10)\n", "    return 42\n", "\n", "\n", "with gr.Blocks() as demo:\n", "    with gr.Row():\n", "        with gr.Column():\n", "            n = gr.Slider(1, 10, value=9, step=1, label=\"Number Steps\")\n", "            run = gr.Button(value=\"Start Iterating\")\n", "            output = gr.Textbox(label=\"Iterative Output\")\n", "            stop = gr.Button(value=\"Stop Iterating\")\n", "        with gr.Column():\n", "            textbox = gr.Textbox(label=\"Prompt\")\n", "            prediction = gr.Number(label=\"Expensive Calculation\")\n", "            run_pred = gr.Button(value=\"Run Expensive Calculation\")\n", "        with gr.Column():\n", "            cancel_on_change = gr.Textbox(label=\"Cancel Iteration and Expensive Calculation on Change\")\n", "            cancel_on_submit = gr.Textbox(label=\"Cancel Iteration and Expensive Calculation on Submit\")\n", "            echo = gr.Textbox(label=\"Echo\")\n", "    with gr.Row():\n", "        with gr.Column():\n", "            image = gr.Image(sources=[\"webcam\"], tool=\"editor\", label=\"Cancel on edit\", interactive=True)\n", "        with gr.Column():\n", "            video = gr.Video(sources=[\"webcam\"], label=\"Cancel on play\", interactive=True)\n", "\n", "    click_event = run.click(fake_diffusion, n, output)\n", "    stop.click(fn=None, inputs=None, outputs=None, cancels=[click_event])\n", "    pred_event = run_pred.click(fn=long_prediction, inputs=[textbox], outputs=prediction)\n", "\n", "    cancel_on_change.change(None, None, None, cancels=[click_event, pred_event])\n", "    cancel_on_submit.submit(lambda s: s, cancel_on_submit, echo, cancels=[click_event, pred_event])\n", "    image.edit(None, None, None, cancels=[click_event, pred_event])\n", "    video.play(None, None, None, cancels=[click_event, pred_event])\n", "\n", "    demo.queue(concurrency_count=2, max_size=20)\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: cancel_events"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import time\n", "import gradio as gr\n", "\n", "\n", "def fake_diffusion(steps):\n", "    for i in range(steps):\n", "        print(f\"Current step: {i}\")\n", "        time.sleep(1)\n", "        yield str(i)\n", "\n", "\n", "def long_prediction(*args, **kwargs):\n", "    time.sleep(10)\n", "    return 42\n", "\n", "\n", "with gr.Blocks() as demo:\n", "    with gr.Row():\n", "        with gr.Column():\n", "            n = gr.Slider(1, 10, value=9, step=1, label=\"Number Steps\")\n", "            run = gr.Button(value=\"Start Iterating\")\n", "            output = gr.Textbox(label=\"Iterative Output\")\n", "            stop = gr.Button(value=\"Stop Iterating\")\n", "        with gr.Column():\n", "            textbox = gr.Textbox(label=\"Prompt\")\n", "            prediction = gr.Number(label=\"Expensive Calculation\")\n", "            run_pred = gr.Button(value=\"Run Expensive Calculation\")\n", "        with gr.Column():\n", "            cancel_on_change = gr.Textbox(label=\"Cancel Iteration and Expensive Calculation on Change\")\n", "            cancel_on_submit = gr.Textbox(label=\"Cancel Iteration and Expensive Calculation on Submit\")\n", "            echo = gr.Textbox(label=\"Echo\")\n", "    with gr.Row():\n", "        with gr.Column():\n", "            image = gr.Image(sources=[\"webcam\"], tool=\"editor\", label=\"Cancel on edit\", interactive=True)\n", "        with gr.Column():\n", "            video = gr.Video(sources=[\"webcam\"], label=\"Cancel on play\", interactive=True)\n", "\n", "    click_event = run.click(fake_diffusion, n, output)\n", "    stop.click(fn=None, inputs=None, outputs=None, cancels=[click_event])\n", "    pred_event = run_pred.click(fn=long_prediction, inputs=[textbox], outputs=prediction)\n", "\n", "    cancel_on_change.change(None, None, None, cancels=[click_event, pred_event])\n", "    cancel_on_submit.submit(lambda s: s, cancel_on_submit, echo, cancels=[click_event, pred_event])\n", "    image.edit(None, None, None, cancels=[click_event, pred_event])\n", "    video.play(None, None, None, cancels=[click_event, pred_event])\n", "\n", "    demo.queue(max_size=20)\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
--- a/demo/cancel_events/run.py
+++ b/demo/cancel_events/run.py
@ -44,7 +44,7 @@ with gr.Blocks() as demo:
    image.edit(None, None, None, cancels=[click_event, pred_event])
    video.play(None, None, None, cancels=[click_event, pred_event])

-    demo.queue(concurrency_count=2, max_size=20)
+    demo.queue(max_size=20)

 if __name__ == "__main__":
    demo.launch()
--- a/demo/concurrency_with_queue/run.ipynb
+++ b/demo/concurrency_with_queue/run.ipynb
@ -1 +1 @@
-{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: concurrency_with_queue"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import time\n", "\n", "\n", "def say_hello(name):\n", "  time.sleep(5)\n", "  return f\"Hello {name}!\"\n", "\n", "\n", "with gr.Blocks() as demo:\n", "  inp = gr.Textbox()\n", "  outp = gr.Textbox()\n", "  button = gr.Button()\n", "  button.click(say_hello, inp, outp)\n", "\n", "  demo.queue(concurrency_count=41).launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: concurrency_with_queue"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import time\n", "\n", "\n", "def say_hello(name):\n", "  time.sleep(5)\n", "  return f\"Hello {name}!\"\n", "\n", "\n", "with gr.Blocks() as demo:\n", "  inp = gr.Textbox()\n", "  outp = gr.Textbox()\n", "  button = gr.Button()\n", "  button.click(say_hello, inp, outp)\n", "\n", "  demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
--- a/demo/concurrency_with_queue/run.py
+++ b/demo/concurrency_with_queue/run.py
@ -13,4 +13,4 @@ with gr.Blocks() as demo:
  button = gr.Button()
  button.click(say_hello, inp, outp)

-  demo.queue(concurrency_count=41).launch()
+  demo.launch()
--- a/demo/progress/run.ipynb
+++ b/demo/progress/run.ipynb
@ -1 +1 @@
-{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: progress"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio tqdm datasets"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "import time\n", "import tqdm\n", "from datasets import load_dataset\n", "import shutil\n", "from uuid import uuid4\n", "\n", "with gr.Blocks() as demo:\n", "    with gr.Row():\n", "        text = gr.Textbox()\n", "        textb = gr.Textbox()\n", "    with gr.Row():\n", "        load_set_btn = gr.Button(\"Load Set\")\n", "        load_nested_set_btn = gr.Button(\"Load Nested Set\")\n", "        load_random_btn = gr.Button(\"Load Random\")\n", "        clean_imgs_btn = gr.Button(\"Clean Images\")\n", "        wait_btn = gr.Button(\"Wait\")\n", "        do_all_btn = gr.Button(\"Do All\")\n", "        track_tqdm_btn = gr.Button(\"Bind TQDM\")\n", "        bind_internal_tqdm_btn = gr.Button(\"Bind Internal TQDM\")\n", "\n", "    text2 = gr.Textbox()\n", "\n", "    # track list\n", "    def load_set(text, text2, progress=gr.Progress()):\n", "        imgs = [None] * 24\n", "        for img in progress.tqdm(imgs, desc=\"Loading from list\"):\n", "            time.sleep(0.1)\n", "        return \"done\"\n", "    load_set_btn.click(load_set, [text, textb], text2)\n", "\n", "    # track nested list\n", "    def load_nested_set(text, text2, progress=gr.Progress()):\n", "        imgs = [[None] * 8] * 3\n", "        for img_set in progress.tqdm(imgs, desc=\"Nested list\"):\n", "            time.sleep(2)\n", "            for img in progress.tqdm(img_set, desc=\"inner list\"):\n", "                time.sleep(0.1)\n", "        return \"done\"\n", "    load_nested_set_btn.click(load_nested_set, [text, textb], text2)\n", "\n", "    # track iterable of unknown length\n", "    def load_random(data, progress=gr.Progress()):\n", "        def yielder():\n", "            for i in range(0, random.randint(15, 20)):\n", "                time.sleep(0.1)\n", "                yield None\n", "        for img in progress.tqdm(yielder()):\n", "            pass\n", "        return \"done\"\n", "    load_random_btn.click(load_random, {text, textb}, text2)\n", "        \n", "    # manual progress\n", "    def clean_imgs(text, progress=gr.Progress()):\n", "        progress(0.2, desc=\"Collecting Images\")\n", "        time.sleep(1)\n", "        progress(0.5, desc=\"Cleaning Images\")\n", "        time.sleep(1.5)\n", "        progress(0.8, desc=\"Sending Images\")\n", "        time.sleep(1.5)\n", "        return \"done\"\n", "    clean_imgs_btn.click(clean_imgs, text, text2)\n", "\n", "    # no progress\n", "    def wait(text):\n", "        time.sleep(4)\n", "        return \"done\"\n", "    wait_btn.click(wait, text, text2)\n", "\n", "    # multiple progressions\n", "    def do_all(data, progress=gr.Progress()):\n", "        load_set(data[text], data[textb], progress)\n", "        load_random(data, progress)\n", "        clean_imgs(data[text], progress)\n", "        progress(None)\n", "        wait(text)\n", "        return \"done\"\n", "    do_all_btn.click(do_all, {text, textb}, text2)\n", "\n", "    def track_tqdm(data, progress=gr.Progress(track_tqdm=True)):\n", "        for i in tqdm.tqdm(range(5), desc=\"outer\"):\n", "            for j in tqdm.tqdm(range(4), desc=\"inner\"):\n", "                time.sleep(1)\n", "        return \"done\"\n", "    track_tqdm_btn.click(track_tqdm, {text, textb}, text2)\n", "\n", "    def bind_internal_tqdm(data, progress=gr.Progress(track_tqdm=True)):\n", "        outdir = \"__tmp/\" + str(uuid4())\n", "        load_dataset(\"beans\", split=\"train\", cache_dir=outdir)\n", "        shutil.rmtree(outdir)\n", "        return \"done\"\n", "    bind_internal_tqdm_btn.click(bind_internal_tqdm, {text, textb}, text2)\n", "\n", "\n", "if __name__ == \"__main__\":\n", "    demo.queue(concurrency_count=20).launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: progress"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio tqdm datasets"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "import time\n", "import tqdm\n", "from datasets import load_dataset\n", "import shutil\n", "from uuid import uuid4\n", "\n", "with gr.Blocks() as demo:\n", "    with gr.Row():\n", "        text = gr.Textbox()\n", "        textb = gr.Textbox()\n", "    with gr.Row():\n", "        load_set_btn = gr.Button(\"Load Set\")\n", "        load_nested_set_btn = gr.Button(\"Load Nested Set\")\n", "        load_random_btn = gr.Button(\"Load Random\")\n", "        clean_imgs_btn = gr.Button(\"Clean Images\")\n", "        wait_btn = gr.Button(\"Wait\")\n", "        do_all_btn = gr.Button(\"Do All\")\n", "        track_tqdm_btn = gr.Button(\"Bind TQDM\")\n", "        bind_internal_tqdm_btn = gr.Button(\"Bind Internal TQDM\")\n", "\n", "    text2 = gr.Textbox()\n", "\n", "    # track list\n", "    def load_set(text, text2, progress=gr.Progress()):\n", "        imgs = [None] * 24\n", "        for img in progress.tqdm(imgs, desc=\"Loading from list\"):\n", "            time.sleep(0.1)\n", "        return \"done\"\n", "    load_set_btn.click(load_set, [text, textb], text2)\n", "\n", "    # track nested list\n", "    def load_nested_set(text, text2, progress=gr.Progress()):\n", "        imgs = [[None] * 8] * 3\n", "        for img_set in progress.tqdm(imgs, desc=\"Nested list\"):\n", "            time.sleep(2)\n", "            for img in progress.tqdm(img_set, desc=\"inner list\"):\n", "                time.sleep(0.1)\n", "        return \"done\"\n", "    load_nested_set_btn.click(load_nested_set, [text, textb], text2)\n", "\n", "    # track iterable of unknown length\n", "    def load_random(data, progress=gr.Progress()):\n", "        def yielder():\n", "            for i in range(0, random.randint(15, 20)):\n", "                time.sleep(0.1)\n", "                yield None\n", "        for img in progress.tqdm(yielder()):\n", "            pass\n", "        return \"done\"\n", "    load_random_btn.click(load_random, {text, textb}, text2)\n", "        \n", "    # manual progress\n", "    def clean_imgs(text, progress=gr.Progress()):\n", "        progress(0.2, desc=\"Collecting Images\")\n", "        time.sleep(1)\n", "        progress(0.5, desc=\"Cleaning Images\")\n", "        time.sleep(1.5)\n", "        progress(0.8, desc=\"Sending Images\")\n", "        time.sleep(1.5)\n", "        return \"done\"\n", "    clean_imgs_btn.click(clean_imgs, text, text2)\n", "\n", "    # no progress\n", "    def wait(text):\n", "        time.sleep(4)\n", "        return \"done\"\n", "    wait_btn.click(wait, text, text2)\n", "\n", "    # multiple progressions\n", "    def do_all(data, progress=gr.Progress()):\n", "        load_set(data[text], data[textb], progress)\n", "        load_random(data, progress)\n", "        clean_imgs(data[text], progress)\n", "        progress(None)\n", "        wait(text)\n", "        return \"done\"\n", "    do_all_btn.click(do_all, {text, textb}, text2)\n", "\n", "    def track_tqdm(data, progress=gr.Progress(track_tqdm=True)):\n", "        for i in tqdm.tqdm(range(5), desc=\"outer\"):\n", "            for j in tqdm.tqdm(range(4), desc=\"inner\"):\n", "                time.sleep(1)\n", "        return \"done\"\n", "    track_tqdm_btn.click(track_tqdm, {text, textb}, text2)\n", "\n", "    def bind_internal_tqdm(data, progress=gr.Progress(track_tqdm=True)):\n", "        outdir = \"__tmp/\" + str(uuid4())\n", "        load_dataset(\"beans\", split=\"train\", cache_dir=outdir)\n", "        shutil.rmtree(outdir)\n", "        return \"done\"\n", "    bind_internal_tqdm_btn.click(bind_internal_tqdm, {text, textb}, text2)\n", "\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
--- a/demo/progress/run.py
+++ b/demo/progress/run.py
@ -94,4 +94,4 @@ with gr.Blocks() as demo:


 if __name__ == "__main__":
-    demo.queue(concurrency_count=20).launch()
+    demo.launch()
--- a/demo/progress_component/run.ipynb
+++ b/demo/progress_component/run.ipynb
@ -1 +1 @@
-{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: progress_component"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio tqdm"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import time \n", "\n", "def load_set(progress=gr.Progress()):\n", "    imgs = [None] * 24\n", "    for img in progress.tqdm(imgs, desc=\"Loading...\"):\n", "        time.sleep(0.1)\n", "    return \"Loaded\"\n", "\n", "\n", "with gr.Blocks() as demo:\n", "    load = gr.Button(\"Load\")\n", "    label = gr.Label(label=\"Loader\")\n", "    load.click(load_set, outputs=label)\n", "\n", "demo.queue(concurrency_count=20).launch()"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: progress_component"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio tqdm"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import time \n", "\n", "def load_set(progress=gr.Progress()):\n", "    imgs = [None] * 24\n", "    for img in progress.tqdm(imgs, desc=\"Loading...\"):\n", "        time.sleep(0.1)\n", "    return \"Loaded\"\n", "\n", "\n", "with gr.Blocks() as demo:\n", "    load = gr.Button(\"Load\")\n", "    label = gr.Label(label=\"Loader\")\n", "    load.click(load_set, outputs=label)\n", "\n", "demo.launch()"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
--- a/demo/progress_component/run.py
+++ b/demo/progress_component/run.py
@ -13,4 +13,4 @@ with gr.Blocks() as demo:
    label = gr.Label(label="Loader")
    load.click(load_set, outputs=label)

-demo.queue(concurrency_count=20).launch()
+demo.launch()
--- a/demo/progress_simple/run.ipynb
+++ b/demo/progress_simple/run.ipynb
@ -1 +1 @@
-{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: progress_simple"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import time\n", "\n", "def slowly_reverse(word, progress=gr.Progress()):\n", "    progress(0, desc=\"Starting\")\n", "    time.sleep(1)\n", "    progress(0.05)\n", "    new_string = \"\"\n", "    for letter in progress.tqdm(word, desc=\"Reversing\"):\n", "        time.sleep(0.25)\n", "        new_string = letter + new_string\n", "    return new_string\n", "\n", "demo = gr.Interface(slowly_reverse, gr.Text(), gr.Text())\n", "\n", "if __name__ == \"__main__\":\n", "    demo.queue(concurrency_count=10).launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: progress_simple"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import time\n", "\n", "def slowly_reverse(word, progress=gr.Progress()):\n", "    progress(0, desc=\"Starting\")\n", "    time.sleep(1)\n", "    progress(0.05)\n", "    new_string = \"\"\n", "    for letter in progress.tqdm(word, desc=\"Reversing\"):\n", "        time.sleep(0.25)\n", "        new_string = letter + new_string\n", "    return new_string\n", "\n", "demo = gr.Interface(slowly_reverse, gr.Text(), gr.Text())\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
--- a/demo/progress_simple/run.py
+++ b/demo/progress_simple/run.py
@ -14,4 +14,4 @@ def slowly_reverse(word, progress=gr.Progress()):
 demo = gr.Interface(slowly_reverse, gr.Text(), gr.Text())

 if __name__ == "__main__":
-    demo.queue(concurrency_count=10).launch()
+    demo.launch()
--- a/gradio/blocks.py
+++ b/gradio/blocks.py
@ -65,7 +65,6 @@ from gradio.utils import (
    TupleNoPrint,
    check_function_inputs_match,
    component_or_layout_class,
-    concurrency_count_warning,
    get_cancel_function,
    get_continuous_fn,
    get_package_version,
@ -329,6 +328,10 @@ class BlockFunction:
        preprocess: bool,
        postprocess: bool,
        inputs_as_dict: bool,
+        batch: bool = False,
+        max_batch_size: int = 4,
+        concurrency_limit: int | None = 1,
+        concurrency_id: str | None = None,
        tracks_progress: bool = False,
    ):
        self.fn = fn
@ -337,6 +340,10 @@ class BlockFunction:
        self.preprocess = preprocess
        self.postprocess = postprocess
        self.tracks_progress = tracks_progress
+        self.concurrency_limit = concurrency_limit
+        self.concurrency_id = concurrency_id or id(fn)
+        self.batch = batch
+        self.max_batch_size = max_batch_size
        self.total_runtime = 0
        self.total_runs = 0
        self.inputs_as_dict = inputs_as_dict
@ -779,6 +786,8 @@ class Blocks(BlockContext, BlocksEvents, metaclass=BlocksMeta):
        trigger_after: int | None = None,
        trigger_only_on_success: bool = False,
        trigger_mode: Literal["once", "multiple", "always_last"] | None = "once",
+        concurrency_limit: int | None = 1,
+        concurrency_id: str | None = None,
    ) -> tuple[dict[str, Any], int]:
        """
        Adds an event to the component's dependencies.
@ -803,6 +812,8 @@ class Blocks(BlockContext, BlocksEvents, metaclass=BlocksMeta):
            trigger_after: if set, this event will be triggered after 'trigger_after' function index
            trigger_only_on_success: if True, this event will only be triggered if the previous event was successful (only applies if `trigger_after` is set)
            trigger_mode: If "once" (default for all events except `.change()`) would not allow any submissions while an event is pending. If set to "multiple", unlimited submissions are allowed while pending, and "always_last" (default for `.change()` event) would allow a second submission after the pending event is complete.
+            concurrency_limit: If set, this this is the maximum number of this event that can be running simultaneously. Extra events triggered by this listener will be queued. On Spaces, this is set to 1 by default.
+            concurrency_id: If set, this is the id of the concurrency group. Events with the same concurrency_id will be limited by the lowest set concurrency_limit.
        Returns: dependency information, dependency index
        """
        # Support for singular parameter
@ -865,8 +876,12 @@ class Blocks(BlockContext, BlocksEvents, metaclass=BlocksMeta):
                outputs,
                preprocess,
                postprocess,
-                inputs_as_dict,
-                progress_index is not None,
+                inputs_as_dict=inputs_as_dict,
+                concurrency_limit=concurrency_limit,
+                concurrency_id=concurrency_id,
+                batch=batch,
+                max_batch_size=max_batch_size,
+                tracks_progress=progress_index is not None,
            )
        )
        if api_name is not None and api_name is not False:
@ -1586,19 +1601,17 @@ Received outputs:
        self.children = []
        return self

-    @concurrency_count_warning
    @document()
    def queue(
        self,
-        concurrency_count: int | None = None,
        status_update_rate: float | Literal["auto"] = "auto",
        api_open: bool | None = None,
        max_size: int | None = None,
+        **kwargs,
    ):
        """
-        By enabling the queue you can control the rate of processed requests, let users know their position in the queue, and set a limit on maximum number of events allowed.
+        By enabling the queue you can control when users know their position in the queue, and set a limit on maximum number of events allowed.
        Parameters:
-            concurrency_count: Number of worker threads that will be processing requests from the queue concurrently. Default is 40 when running locally, and 1 in Spaces.
            status_update_rate: If "auto", Queue will send status estimations to all clients whenever a job is finished. Otherwise Queue will send status at regular intervals set by this parameter as the number of seconds.
            api_open: If True, the REST routes of the backend will be open, allowing requests made directly to those endpoints to skip the queue.
            max_size: The maximum number of events the queue will store at any given moment. If the queue is full, new events will not be added and a user will receive a message saying that the queue is full. If None, the queue size will be unlimited.
@ -1613,19 +1626,22 @@ Received outputs:
            demo.queue(max_size=20)
            demo.launch()
        """
-        if concurrency_count is None:
-            concurrency_count = 1 if utils.get_space() else 40
+        if "concurrency_count" in kwargs:
+            raise DeprecationWarning(
+                "concurrency_count has been deprecated. Set the concurrency_limit directly on event listeners e.g. btn.click(fn, ..., concurrency_limit=10) or gr.Interface(concurrency_limit=10). If necessary, the total number of workers can be configured via `max_threads` in launch()."
+            )
+        if len(kwargs):
+            raise ValueError(f"Invalid arguments: {kwargs}")
        if api_open is not None:
            self.api_open = api_open
        if utils.is_zero_gpu_space():
-            concurrency_count = self.max_threads
            max_size = 1 if max_size is None else max_size
        self._queue = queueing.Queue(
            live_updates=status_update_rate == "auto",
-            concurrency_count=concurrency_count,
+            concurrency_count=self.max_threads,
            update_intervals=status_update_rate if status_update_rate != "auto" else 1,
            max_size=max_size,
-            blocks_dependencies=self.dependencies,
+            block_fns=self.fns,
        )
        self.config = self.get_config_file()
        self.app = routes.App.create_app(self)
@ -1692,7 +1708,7 @@ Received outputs:
            show_error: If True, any errors in the interface will be displayed in an alert modal and printed in the browser console log
            server_port: will start gradio app on this port (if available). Can be set by environment variable GRADIO_SERVER_PORT. If None, will search for an available port starting at 7860.
            server_name: to make app accessible on local network, set this to "0.0.0.0". Can be set by environment variable GRADIO_SERVER_NAME. If None, will use "127.0.0.1".
-            max_threads: the maximum number of total threads that the Gradio app can generate in parallel. The default is inherited from the starlette library (currently 40). Applies whether the queue is enabled or not. But if queuing is enabled, this parameter is increaseed to be at least the concurrency_count of the queue.
+            max_threads: the maximum number of total threads that the Gradio app can generate in parallel. The default is inherited from the starlette library (currently 40).
            width: The width in pixels of the iframe element containing the interface (used if inline=True)
            height: The height in pixels of the iframe element containing the interface (used if inline=True)
            favicon_path: If a path to a file (.png, .gif, or .ico) is provided, it will be used as the favicon for the web page.
@ -1769,7 +1785,8 @@ Received outputs:
        self.validate_queue_settings()

        self.config = self.get_config_file()
-        self.max_threads = max(self._queue.max_thread_count, max_threads)
+        self.max_threads = max_threads
+        self._queue.max_thread_count = max_threads

        if self.is_running:
            if not isinstance(self.local_url, str):
--- a/gradio/events.py
+++ b/gradio/events.py
@ -206,6 +206,8 @@ class EventListener(str):
            cancels: dict[str, Any] | list[dict[str, Any]] | None = None,
            every: float | None = None,
            trigger_mode: Literal["once", "multiple", "always_last"] | None = None,
+            concurrency_limit: int | None = 1,
+            concurrency_id: str | None = None,
            _js: str | None = None,
        ) -> Dependency:
            """
@ -224,6 +226,8 @@ class EventListener(str):
                cancels: A list of other events to cancel when this listener is triggered. For example, setting cancels=[click_event] will cancel the click_event, where click_event is the return value of another components .click method. Functions that have not yet run (or generators that are iterating) will be cancelled, but functions that are currently running will be allowed to finish.
                every: Run this event 'every' number of seconds while the client connection is open. Interpreted in seconds. Queue must be enabled.
                trigger_mode: If "once" (default for all events except `.change()`) would not allow any submissions while an event is pending. If set to "multiple", unlimited submissions are allowed while pending, and "always_last" (default for `.change()` event) would allow a second submission after the pending event is complete.
+                concurrency_limit: If set, this this is the maximum number of events that can be running simultaneously. Extra requests will be queued.
+                concurrency_id: If set, this is the id of the concurrency group. Events with the same concurrency_id will be limited by the lowest set concurrency_limit.
            """

            if fn == "decorator":
@ -245,6 +249,8 @@ class EventListener(str):
                        cancels,
                        every,
                        trigger_mode,
+                        concurrency_limit,
+                        concurrency_id,
                        _js,
                    )

@ -302,6 +308,8 @@ class EventListener(str):
                if show_progress is not None
                else _show_progress,
                api_name=api_name,
+                concurrency_limit=concurrency_limit,
+                concurrency_id=concurrency_id,
                js=_js,
                queue=queue,
                batch=batch,
@ -341,6 +349,8 @@ def on(
    postprocess: bool = True,
    cancels: dict[str, Any] | list[dict[str, Any]] | None = None,
    every: float | None = None,
+    concurrency_limit: int | None = 1,
+    concurrency_id: str | None = None,
    _js: str | None = None,
 ) -> Dependency:
    """
@ -359,6 +369,8 @@ def on(
        postprocess: If False, will not run postprocessing of component data before returning 'fn' output to the browser.
        cancels: A list of other events to cancel when this listener is triggered. For example, setting cancels=[click_event] will cancel the click_event, where click_event is the return value of another components .click method. Functions that have not yet run (or generators that are iterating) will be cancelled, but functions that are currently running will be allowed to finish.
        every: Run this event 'every' number of seconds while the client connection is open. Interpreted in seconds. Queue must be enabled.
+        concurrency_limit: If set, this this is the maximum number of events that can be running simultaneously. Extra requests will be queued.
+        concurrency_id: If set, this is the id of the concurrency group. Events with the same concurrency_id will be limited by the lowest set concurrency_limit.
    """
    from gradio.components.base import Component

@ -385,6 +397,8 @@ def on(
                postprocess=postprocess,
                cancels=cancels,
                every=every,
+                concurrency_limit=concurrency_limit,
+                concurrency_id=concurrency_id,
                _js=_js,
            )

@ -412,6 +426,8 @@ def on(
        scroll_to_output=scroll_to_output,
        show_progress=show_progress,
        api_name=api_name,
+        concurrency_limit=concurrency_limit,
+        concurrency_id=concurrency_id,
        js=_js,
        queue=queue,
        batch=batch,
--- a/gradio/interface.py
+++ b/gradio/interface.py
@ -114,6 +114,7 @@ class Interface(Blocks):
        api_name: str | Literal[False] | None = "predict",
        _api_mode: bool = False,
        allow_duplication: bool = False,
+        concurrency_limit: int | None = 1,
        **kwargs,
    ):
        """
@ -140,6 +141,7 @@ class Interface(Blocks):
            max_batch_size: Maximum number of inputs to batch together if this is called from the queue (only relevant if batch=True)
            api_name: Defines how the endpoint appears in the API docs. Can be a string, None, or False. If False or None, the endpoint will not be exposed in the api docs. If set to a string, the endpoint will be exposed in the api docs with the given name. Default value is "predict".
            allow_duplication: If True, then will show a 'Duplicate Spaces' button on Hugging Face Spaces.
+            concurrency_limit: If set, this this is the maximum number of events that can be running simultaneously. Extra requests will be queued.
        """
        super().__init__(
            analytics_enabled=analytics_enabled,
@ -310,6 +312,7 @@ class Interface(Blocks):
        self.batch = batch
        self.max_batch_size = max_batch_size
        self.allow_duplication = allow_duplication
+        self.concurrency_limit = concurrency_limit

        self.share = None
        self.share_url = None
@ -599,6 +602,7 @@ class Interface(Blocks):
                    postprocess=not (self.api_mode),
                    batch=self.batch,
                    max_batch_size=self.max_batch_size,
+                    concurrency_limit=self.concurrency_limit,
                )

                predict_event.then(
@ -628,6 +632,7 @@ class Interface(Blocks):
                    postprocess=not (self.api_mode),
                    batch=self.batch,
                    max_batch_size=self.max_batch_size,
+                    concurrency_limit=self.concurrency_limit,
                )

    def attach_clear_events(
--- a/gradio/queueing.py
+++ b/gradio/queueing.py
@ -6,8 +6,8 @@ import json
 import time
 import traceback
 import uuid
-from collections import deque
 from queue import Queue as ThreadQueue
+from typing import TYPE_CHECKING

 import fastapi
 from typing_extensions import Literal
@ -24,6 +24,9 @@ from gradio.exceptions import Error
 from gradio.helpers import TrackedIterable
 from gradio.utils import run_coro_in_background, safe_get_lock, set_task_name

+if TYPE_CHECKING:
+    from gradio.blocks import BlockFunction
+

 class Event:
    def __init__(
@ -74,14 +77,14 @@ class Queue:
        concurrency_count: int,
        update_intervals: float,
        max_size: int | None,
-        blocks_dependencies: list,
+        block_fns: list[BlockFunction],
    ):
-        self.event_queue: deque[Event] = deque()
+        self.event_queue: list[Event] = []
        self.awaiting_data_events: dict[str, Event] = {}
        self.stopped = False
        self.max_thread_count = concurrency_count
        self.update_intervals = update_intervals
-        self.active_jobs: list[None | list[Event]] = [None] * concurrency_count
+        self.active_jobs: list[None | list[Event]] = []
        self.delete_lock = safe_get_lock()
        self.server_app = None
        self.duration_history_total = 0
@ -93,11 +96,26 @@ class Queue:
        self.sleep_when_free = 0.05
        self.progress_update_sleep_when_free = 0.1
        self.max_size = max_size
-        self.blocks_dependencies = blocks_dependencies
+        self.block_fns = block_fns
        self.continuous_tasks: list[Event] = []
        self._asyncio_tasks: list[asyncio.Task] = []

+        self.concurrency_limit_per_concurrency_id = {}
+
    def start(self):
+        self.active_jobs = [None] * self.max_thread_count
+        for block_fn in self.block_fns:
+            if block_fn.concurrency_limit is not None:
+                self.concurrency_limit_per_concurrency_id[
+                    block_fn.concurrency_id
+                ] = min(
+                    self.concurrency_limit_per_concurrency_id.get(
+                        block_fn.concurrency_id, block_fn.concurrency_limit
+                    ),
+                    block_fn.concurrency_limit,
+                )
+        print(">>>", self.concurrency_limit_per_concurrency_id)
+
        run_coro_in_background(self.start_processing)
        run_coro_in_background(self.start_progress_updates)
        if not self.live_updates:
@ -130,22 +148,46 @@ class Queue:
        return count

    def get_events_in_batch(self) -> tuple[list[Event] | None, bool]:
-        if not (self.event_queue):
+        if not self.event_queue:
            return None, False

-        first_event = self.event_queue.popleft()
-        events = [first_event]
+        worker_count_per_concurrency_id = {}
+        for job in self.active_jobs:
+            if job is not None:
+                for event in job:
+                    concurrency_id = self.block_fns[event.fn_index].concurrency_id
+                    worker_count_per_concurrency_id[concurrency_id] = (
+                        worker_count_per_concurrency_id.get(concurrency_id, 0) + 1
+                    )

-        event_fn_index = first_event.fn_index
-        batch = self.blocks_dependencies[event_fn_index]["batch"]
+        events = []
+        batch = False
+        for index, event in enumerate(self.event_queue):
+            block_fn = self.block_fns[event.fn_index]
+            concurrency_id = block_fn.concurrency_id
+            concurrency_limit = self.concurrency_limit_per_concurrency_id.get(
+                concurrency_id, None
+            )
+            existing_worker_count = worker_count_per_concurrency_id.get(
+                concurrency_id, 0
+            )
+            if concurrency_limit is None or existing_worker_count < concurrency_limit:
+                batch = block_fn.batch
+                if batch:
+                    remaining_worker_count = concurrency_limit - existing_worker_count
+                    batch_size = block_fn.max_batch_size
+                    rest_of_batch = [
+                        event
+                        for event in self.event_queue[index:]
+                        if event.fn_index == event.fn_index
+                    ][: min(batch_size - 1, remaining_worker_count)]
+                    events = [event] + rest_of_batch
+                else:
+                    events = [event]
+                break

-        if batch:
-            batch_size = self.blocks_dependencies[event_fn_index]["max_batch_size"]
-            rest_of_batch = [
-                event for event in self.event_queue if event.fn_index == event_fn_index
-            ][: batch_size - 1]
-            events.extend(rest_of_batch)
-            [self.event_queue.remove(event) for event in rest_of_batch]
+        for event in events:
+            self.event_queue.remove(event)

        return events, batch

@ -180,6 +222,8 @@ class Queue:
                        self.broadcast_estimations
                    )
                    self._asyncio_tasks.append(broadcast_live_estimations_task)
+            else:
+                await asyncio.sleep(self.sleep_when_free)

    async def start_progress_updates(self) -> None:
        """
--- a/gradio/utils.py
+++ b/gradio/utils.py
@ -96,7 +96,7 @@ class BaseReloader(ABC):
        # Copy over the blocks to get new components and events but
        # not a new queue
        if self.running_app.blocks._queue:
-            self.running_app.blocks._queue.blocks_dependencies = demo.dependencies
+            self.running_app.blocks._queue.block_fns = demo.fns
            demo._queue = self.running_app.blocks._queue
        self.running_app.blocks = demo

@ -826,23 +826,6 @@ def check_function_inputs_match(fn: Callable, inputs: list, inputs_as_dict: bool
        )


-def concurrency_count_warning(queue: Callable[P, T]) -> Callable[P, T]:
-    @functools.wraps(queue)
-    def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
-        _self, *positional = args
-        if is_zero_gpu_space() and (
-            len(positional) >= 1 or "concurrency_count" in kwargs
-        ):
-            warnings.warn(
-                "Queue concurrency_count on ZeroGPU Spaces cannot be overridden "
-                "and is always equal to Block's max_threads. "
-                "Consider setting max_threads value on the Block instead"
-            )
-        return queue(*args, **kwargs)
-
-    return wrapper
-
-
 class TupleNoPrint(tuple):
    # To remove printing function return in notebook
    def __repr__(self):
--- a/guides/01_getting-started/02_key-features.md
+++ b/guides/01_getting-started/02_key-features.md
@ -174,9 +174,13 @@ demo.launch()
 You can control the number of requests processed at a single time as such:

 ```python
-demo.queue(concurrency_count=3)
+with gr.Blocks() as demo:
+    btn = gr.Button("Run")
+    btn.click(..., concurrency_limit=2)
 ```

+This limits the number of requests processed for this event listener at a single time to 2.
+
 See the [Docs on queueing](/docs/#queue) on configuring other queuing parameters.

 To specify only certain functions for queueing in Blocks:
--- a/guides/08_other-tutorials/setting-up-a-demo-for-maximum-performance.md
+++ b/guides/08_other-tutorials/setting-up-a-demo-for-maximum-performance.md
@ -36,15 +36,15 @@ Note: If you host your Gradio app on [Hugging Face Spaces](https://hf.space), th

 There are several parameters that can be used to configure the queue and help reduce latency. Let's go through them one-by-one.

-### The `concurrency_count` parameter
+### The `concurrency_limit` parameter

-The first parameter we will explore is the `concurrency_count` parameter of `queue()`. This parameter is used to set the number of worker threads in the Gradio server that will be processing your requests in parallel. By default, this parameter is set to `1` but increasing this can **linearly multiply the capacity of your server to handle requests**.
+The first parameter we will explore is the `concurrency_limit` parameter of any event listener, e.g. `btn.click(..., concurrency_limit=20)` or `gr.Interface(..., concurrency_limit=20)`. This parameter is used to set the maximum number of worker threads in the Gradio server that can be processing your requests at once. By default, this parameter is set to `1` but increasing this can **linearly multiply the capacity of your server to handle requests**.

-So why not set this parameter much higher? Keep in mind that since requests are processed in parallel, each request will consume memory to store the data and weights for processing. This means that you might get out-of-memory errors if you increase the `concurrency_count` too high. You may also start to get diminishing returns if the `concurrency_count` is too high because of costs of switching between different worker threads.
+So why not set this parameter much higher? Keep in mind that since requests are processed in parallel, each request will consume memory to store the data and weights for processing. This means that you might get out-of-memory errors if you increase the `concurrency_limit` too high. You may also start to get diminishing returns if the `concurrency_limit` is too high because of costs of switching between different worker threads.

-**Recommendation**: Increase the `concurrency_count` parameter as high as you can while you continue to see performance gains or until you hit memory limits on your machine. You can [read about Hugging Face Spaces machine specs here](https://huggingface.co/docs/hub/spaces-overview).
+**Recommendation**: Increase the `concurrency_limit` parameter as high as you can while you continue to see performance gains or until you hit memory limits on your machine. You can [read about Hugging Face Spaces machine specs here](https://huggingface.co/docs/hub/spaces-overview).

-_Note_: there is a second parameter which controls the _total_ number of threads that Gradio can generate, whether or not queuing is enabled. This is the `max_threads` parameter in the `launch()` method. When you increase the `concurrency_count` parameter in `queue()`, this is automatically increased as well. However, in some cases, you may want to manually increase this, e.g. if queuing is not enabled.
+_Note_: there is a second parameter which controls the _total_ number of threads that Gradio can generate, across all your events. This is the `max_threads` parameter in the `launch()` method. You may want to manually increase this.

 ### The `max_size` parameter

@ -87,7 +87,7 @@ def trim_words(words, lengths):

 The second function can be used with `batch=True` and an appropriate `max_batch_size` parameter.

-**Recommendation**: If possible, write your function to accept batches of samples, and then set `batch` to `True` and the `max_batch_size` as high as possible based on your machine's memory limits. If you set `max_batch_size` as high as possible, you will most likely need to set `concurrency_count` back to `1` since you will no longer have the memory to have multiple workers running in parallel.
+**Recommendation**: If possible, write your function to accept batches of samples, and then set `batch` to `True` and the `max_batch_size` as high as possible based on your machine's memory limits.

 ### The `api_open` parameter

@ -106,7 +106,7 @@ It is particularly straightforward to upgrade your Hardware on Hugging Face Spac
 While you might need to adapt portions of your machine learning inference code to run on a GPU (here's a [handy guide](https://cnvrg.io/pytorch-cuda/) if you are using PyTorch), Gradio is completely agnostic to the choice of hardware and will work completely fine if you use it with CPUs, GPUs, TPUs, or any other hardware!

 Note: your GPU memory is different than your CPU memory, so if you upgrade your hardware,
-you might need to adjust the value of the `concurrency_count` parameter described above.
+you might need to adjust the value of the `concurrency_limit` parameter described above.

 ## Conclusion

--- a/scripts/benchmark_queue.py
+++ b/scripts/benchmark_queue.py
@ -56,7 +56,7 @@ with gr.Blocks() as demo:
            output_video = gr.Video()
            submit_video = gr.Button()
            submit_video.click(identity_with_sleep, input_video, output_video, api_name="video")
-demo.queue(max_size=50, concurrency_count=20).launch(prevent_thread_lock=True, quiet=True)
+demo.queue(max_size=50).launch(prevent_thread_lock=True, quiet=True)


 FN_INDEX_TO_DATA = {
--- a/test/test_blocks.py
+++ b/test/test_blocks.py
@ -217,14 +217,18 @@ class TestBlocksMethods:
                    num1 = gr.Number(value=4, precision=0)
                    o1 = gr.Number()
                    async_iterate = gr.Button(value="Async Iteration")
-                    async_iterate.click(async_iteration, num1, o1)
+                    async_iterate.click(
+                        async_iteration,
+                        num1,
+                        o1,
+                        concurrency_limit=2,
+                        concurrency_id="main",
+                    )
                with gr.Column():
                    num2 = gr.Number(value=4, precision=0)
                    o2 = gr.Number()
                    iterate = gr.Button(value="Iterate")
-                    iterate.click(iteration, num2, o2)
-
-        demo.queue(concurrency_count=2)
+                    iterate.click(iteration, num2, o2, concurrency_id="main")

        with connect(demo) as client:
            job_1 = client.submit(3, fn_index=0)
@ -339,15 +343,6 @@ class TestBlocksMethods:
        demo.launch(prevent_thread_lock=True)
        assert len(demo.get_config_file()["dependencies"]) == 1

-    def test_concurrency_count_zero_gpu(self, monkeypatch):
-        monkeypatch.setenv("SPACES_ZERO_GPU", "true")
-        demo = gr.Blocks()
-        with pytest.warns():
-            demo.queue(concurrency_count=42)
-        with pytest.warns():
-            demo.queue(42)
-        assert demo._queue.max_thread_count == demo.max_threads
-

 class TestTempFile:
    def test_pil_images_hashed(self, connect, gradio_temp_dir):
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@ -934,8 +934,13 @@ async def test_info_isolation(async_handler: bool):
        gr.Info(f"Hello {name}")
        return name

-    demo = gr.Interface(greet_async if async_handler else greet_sync, "text", "text")
-    demo.queue(concurrency_count=2).launch(prevent_thread_lock=True)
+    demo = gr.Interface(
+        greet_async if async_handler else greet_sync,
+        "text",
+        "text",
+        concurrency_limit=2,
+    )
+    demo.launch(prevent_thread_lock=True)

    async def session_interaction(name, delay=0):
        await asyncio.sleep(delay)
--- a/test/test_queueing.py
+++ b/test/test_queueing.py
@ -24,43 +24,6 @@ class TestQueueing:

        assert job.result() == "Hello, x!"

-    def test_multiple_requests(self):
-        with gr.Blocks() as demo:
-            name = gr.Textbox()
-            output = gr.Textbox()
-
-            def greet(x):
-                time.sleep(2)
-                return f"Hello, {x}!"
-
-            name.submit(greet, name, output)
-
-        app, _, _ = demo.queue(concurrency_count=2).launch(prevent_thread_lock=True)
-        test_client = TestClient(app)
-
-        client = grc.Client(f"http://localhost:{demo.server_port}")
-        client.submit("a", fn_index=0)
-        job2 = client.submit("b", fn_index=0)
-        client.submit("c", fn_index=0)
-        job4 = client.submit("d", fn_index=0)
-
-        sizes = []
-        while job4.status().code.value != "FINISHED":
-            queue_status = test_client.get("/queue/status").json()
-            sizes.append(queue_status["queue_size"])
-            time.sleep(0.05)
-
-        assert max(sizes) in [
-            2,
-            3,
-            4,
-        ]  # Can be 2 - 4, depending on if the workers have picked up jobs before the queue status is checked
-        assert min(sizes) == 0
-        assert sizes[-1] == 0
-
-        assert job2.result() == "Hello, b!"
-        assert job4.result() == "Hello, d!"
-
    def test_all_status_messages(self):
        with gr.Blocks() as demo:
            name = gr.Textbox()
@ -70,9 +33,9 @@ class TestQueueing:
                time.sleep(2)
                return f"Hello, {x}!"

-            name.submit(greet, name, output)
+            name.submit(greet, name, output, concurrency_limit=2)

-        app, _, _ = demo.queue(concurrency_count=2).launch(prevent_thread_lock=True)
+        app, _, _ = demo.launch(prevent_thread_lock=True)
        test_client = TestClient(app)

        client = grc.Client(f"http://localhost:{demo.server_port}")
@ -100,13 +63,98 @@ class TestQueueing:
            3,
            4,
        ]  # Can be 2 - 4, depending on if the workers have picked up jobs before the queue status is checked
-        print(sizes)
+
        assert min(sizes) == 0
        assert sizes[-1] == 0

        assert job2.result() == "Hello, b!"
        assert job4.result() == "Hello, d!"

+    def test_concurrency_limits(self):
+        with gr.Blocks() as demo:
+            a = gr.Number()
+            b = gr.Number()
+            output = gr.Number()
+
+            add_btn = gr.Button("Add")
+
+            @add_btn.click(inputs=[a, b], outputs=output, concurrency_limit=2)
+            def add(x, y):
+                time.sleep(4)
+                return x + y
+
+            sub_btn = gr.Button("Subtract")
+
+            @sub_btn.click(inputs=[a, b], outputs=output, concurrency_limit=None)
+            def sub(x, y):
+                time.sleep(4)
+                return x - y
+
+            mul_btn = gr.Button("Multiply")
+
+            @mul_btn.click(
+                inputs=[a, b],
+                outputs=output,
+                concurrency_limit=2,
+                concurrency_id="muldiv",
+            )
+            def mul(x, y):
+                time.sleep(4)
+                return x * y
+
+            div_btn = gr.Button("Divide")
+
+            @div_btn.click(
+                inputs=[a, b],
+                outputs=output,
+                concurrency_limit=2,
+                concurrency_id="muldiv",
+            )
+            def div(x, y):
+                time.sleep(4)
+                return x / y
+
+        app, _, _ = demo.launch(prevent_thread_lock=True)
+
+        client = grc.Client(f"http://localhost:{demo.server_port}")
+        add_job_1 = client.submit(1, 1, fn_index=0)
+        add_job_2 = client.submit(1, 1, fn_index=0)
+        add_job_3 = client.submit(1, 1, fn_index=0)
+        sub_job_1 = client.submit(1, 1, fn_index=1)
+        sub_job_2 = client.submit(1, 1, fn_index=1)
+        sub_job_3 = client.submit(1, 1, fn_index=1)
+        sub_job_3 = client.submit(1, 1, fn_index=1)
+        mul_job_1 = client.submit(1, 1, fn_index=2)
+        div_job_1 = client.submit(1, 1, fn_index=3)
+        mul_job_2 = client.submit(1, 1, fn_index=2)
+
+        time.sleep(2)
+
+        add_job_statuses = [add_job_1.status(), add_job_2.status(), add_job_3.status()]
+        assert sorted([s.code.value for s in add_job_statuses]) == [
+            "IN_QUEUE",
+            "PROCESSING",
+            "PROCESSING",
+        ]
+
+        sub_job_statuses = [sub_job_1.status(), sub_job_2.status(), sub_job_3.status()]
+        assert [s.code.value for s in sub_job_statuses] == [
+            "PROCESSING",
+            "PROCESSING",
+            "PROCESSING",
+        ]
+
+        muldiv_job_statuses = [
+            mul_job_1.status(),
+            div_job_1.status(),
+            mul_job_2.status(),
+        ]
+        assert sorted([s.code.value for s in muldiv_job_statuses]) == [
+            "IN_QUEUE",
+            "PROCESSING",
+            "PROCESSING",
+        ]
+
    def test_every_does_not_block_queue(self):
        with gr.Blocks() as demo:
            num = gr.Number(value=0)