mirror of
https://github.com/gradio-app/gradio.git
synced 2025-03-31 12:20:26 +08:00
Allow client to be duplicated from an existing Space (#3809)
* client * readme * readme * fixes * cleanup * error messages * format * updates * format * build * fixes * fix tests * Update client/python/gradio_client/client.py Co-authored-by: aliabid94 <aabid94@gmail.com> * set timeout * rename * format * rename * sleep timeout * format * mock secrets * format * format * docstring * fix test --------- Co-authored-by: aliabid94 <aabid94@gmail.com>
This commit is contained in:
parent
0b725fbac1
commit
52d0885937
@ -53,6 +53,26 @@ from gradio_client import Client
|
||||
client = Client("abidlabs/my-private-space", hf_token="...")
|
||||
```
|
||||
|
||||
**Duplicating a Space for private use**
|
||||
|
||||
While you can use any public Space as an API, you may get rate limited by Hugging Face if you make too many requests. For unlimited usage of a Space, simply duplicate the Space to create a private Space,
|
||||
and then use it to make as many requests as you'd like!
|
||||
|
||||
The `gradio_client` includes a class method: `Client.duplicate()` to make this process simple:
|
||||
|
||||
```python
|
||||
from gradio_client import Client
|
||||
|
||||
client = Client.duplicate("abidlabs/whisper")
|
||||
client.predict("audio_sample.wav")
|
||||
|
||||
>> "This is a test of the whisper speech recognition model."
|
||||
```
|
||||
|
||||
If you have previously duplicated a Space, re-running `duplicate()` will *not* create a new Space. Instead, the Client will attach to the previously-created Space. So it is safe to re-run the `Client.duplicate()` method multiple times.
|
||||
|
||||
**Note:** if the original Space uses GPUs, your private Space will as well, and your Hugging Face account will get billed based on the price of the GPU. To minimize charges, your Space will automatically go to sleep after 1 hour of inactivity. You can also set the hardware using the `hardware` parameter of `duplicate()`.
|
||||
|
||||
|
||||
**Connecting a general Gradio app**
|
||||
|
||||
@ -64,9 +84,10 @@ from gradio_client import Client
|
||||
client = Client("https://bec81a83-5b5c-471e.gradio.live")
|
||||
```
|
||||
|
||||
|
||||
### Inspecting the API endpoints
|
||||
|
||||
Once you have connected to a Gradio app, you can view the APIs that are available to you by calling the `Client.view_api()` method. For the Whisper Space, we see the following:
|
||||
Once you have connected to a Gradio app, you can view the APIs that are available to you by calling the `.view_api()` method. For the Whisper Space, we see the following:
|
||||
|
||||
```
|
||||
Client.predict() Usage Info
|
||||
|
@ -17,7 +17,11 @@ from typing import Any, Callable, Dict, List, Tuple
|
||||
import huggingface_hub
|
||||
import requests
|
||||
import websockets
|
||||
from huggingface_hub.utils import build_hf_headers, send_telemetry
|
||||
from huggingface_hub.utils import (
|
||||
RepositoryNotFoundError,
|
||||
build_hf_headers,
|
||||
send_telemetry,
|
||||
)
|
||||
from packaging import version
|
||||
from typing_extensions import Literal
|
||||
|
||||
@ -58,7 +62,7 @@ class Client:
|
||||
"""
|
||||
Parameters:
|
||||
src: Either the name of the Hugging Face Space to load, (e.g. "abidlabs/whisper-large-v2") or the full URL (including "http" or "https") of the hosted Gradio app to load (e.g. "http://mydomain.com/app" or "https://bec81a83-5b5c-471e.gradio.live/").
|
||||
hf_token: The Hugging Face token to use to access private Spaces. Automatically fetched if you are logged in via the Hugging Face Hub CLI.
|
||||
hf_token: The Hugging Face token to use to access private Spaces. Automatically fetched if you are logged in via the Hugging Face Hub CLI. Obtain from: https://huggingface.co/settings/token
|
||||
max_workers: The maximum number of thread workers that can be used to make requests to the remote Gradio app simultaneously.
|
||||
verbose: Whether the client should print statements to the console.
|
||||
"""
|
||||
@ -81,14 +85,20 @@ class Client:
|
||||
)
|
||||
self.space_id = src
|
||||
self.src = _src
|
||||
if self.verbose:
|
||||
print(f"Loaded as API: {self.src} ✔")
|
||||
state = self._get_space_state()
|
||||
if state == utils.BUILDING_RUNTIME:
|
||||
if self.verbose:
|
||||
print("Space is still building. Please wait...")
|
||||
while self._get_space_state() == utils.BUILDING_RUNTIME:
|
||||
time.sleep(2) # so we don't get rate limited by the API
|
||||
pass
|
||||
if state in utils.INVALID_RUNTIME:
|
||||
raise ValueError(
|
||||
f"The current space is in the invalid state: {state}. "
|
||||
"Please contact the owner to fix this."
|
||||
)
|
||||
if self.verbose:
|
||||
print(f"Loaded as API: {self.src} ✔")
|
||||
|
||||
self.api_url = urllib.parse.urljoin(self.src, utils.API_URL)
|
||||
self.ws_url = urllib.parse.urljoin(
|
||||
@ -110,11 +120,101 @@ class Client:
|
||||
# Disable telemetry by setting the env variable HF_HUB_DISABLE_TELEMETRY=1
|
||||
threading.Thread(target=self._telemetry_thread).start()
|
||||
|
||||
@classmethod
|
||||
def duplicate(
|
||||
cls,
|
||||
from_id: str,
|
||||
to_id: str | None = None,
|
||||
hf_token: str | None = None,
|
||||
private: bool = True,
|
||||
hardware: str | None = None,
|
||||
secrets: Dict[str, str] | None = None,
|
||||
sleep_timeout: int = 5,
|
||||
max_workers: int = 40,
|
||||
verbose: bool = True,
|
||||
):
|
||||
"""
|
||||
Duplicates a Hugging Face Space under your account and returns a Client object
|
||||
for the new Space. No duplication is created if the Space already exists in your
|
||||
account (to override this, provide a new name for the new Space using `to_id`).
|
||||
To use this method, you must provide an `hf_token` or be logged in via the Hugging
|
||||
Face Hub CLI.
|
||||
|
||||
The new Space will be private by default and use the same hardware as the original
|
||||
Space. This can be changed by using the `private` and `hardware` parameters. For
|
||||
hardware upgrades (beyond the basic CPU tier), you may be required to provide
|
||||
billing information on Hugging Face: https://huggingface.co/settings/billing
|
||||
|
||||
Parameters:
|
||||
from_id: The name of the Hugging Face Space to duplicate in the format "{username}/{space_id}", e.g. "gradio/whisper".
|
||||
to_id: The name of the new Hugging Face Space to create, e.g. "abidlabs/whisper-duplicate". If not provided, the new Space will be named "{your_HF_username}/{space_id}".
|
||||
hf_token: The Hugging Face token to use to access private Spaces. Automatically fetched if you are logged in via the Hugging Face Hub CLI. Obtain from: https://huggingface.co/settings/token
|
||||
private: Whether the new Space should be private (True) or public (False). Defaults to True.
|
||||
hardware: The hardware tier to use for the new Space. Defaults to the same hardware tier as the original Space. Options include "cpu-basic", "cpu-upgrade", "t4-small", "t4-medium", "a10g-small", "a10g-large", "a100-large", subject to availability.
|
||||
secrets: A dictionary of (secret key, secret value) to pass to the new Space. Defaults to None.
|
||||
sleep_timeout: The number of minutes after which the duplicate Space will be puased if no requests are made to it (to minimize billing charges). Defaults to 5 minutes.
|
||||
max_workers: The maximum number of thread workers that can be used to make requests to the remote Gradio app simultaneously.
|
||||
verbose: Whether the client should print statements to the console.
|
||||
"""
|
||||
try:
|
||||
info = huggingface_hub.get_space_runtime(from_id, token=hf_token)
|
||||
except RepositoryNotFoundError:
|
||||
raise ValueError(
|
||||
f"Could not find Space: {from_id}. If it is a private Space, please provide an `hf_token`."
|
||||
)
|
||||
if to_id:
|
||||
if "/" in to_id:
|
||||
to_id = to_id.split("/")[1]
|
||||
space_id = huggingface_hub.get_full_repo_name(to_id, token=hf_token)
|
||||
else:
|
||||
space_id = huggingface_hub.get_full_repo_name(
|
||||
from_id.split("/")[1], token=hf_token
|
||||
)
|
||||
try:
|
||||
huggingface_hub.get_space_runtime(space_id, token=hf_token)
|
||||
if verbose:
|
||||
print(
|
||||
f"Using your existing Space: {utils.SPACE_URL.format(space_id)} 🤗"
|
||||
)
|
||||
except RepositoryNotFoundError:
|
||||
if verbose:
|
||||
print(f"Creating a duplicate of {from_id} for your own use... 🤗")
|
||||
huggingface_hub.duplicate_space(
|
||||
from_id=from_id,
|
||||
to_id=space_id,
|
||||
token=hf_token,
|
||||
exist_ok=True,
|
||||
private=private,
|
||||
)
|
||||
utils.set_space_timeout(
|
||||
space_id, hf_token=hf_token, timeout_in_seconds=sleep_timeout * 60
|
||||
)
|
||||
if verbose:
|
||||
print(f"Created new Space: {utils.SPACE_URL.format(space_id)}")
|
||||
current_info = huggingface_hub.get_space_runtime(space_id, token=hf_token)
|
||||
current_hardware = current_info.hardware or "cpu-basic"
|
||||
if hardware is None:
|
||||
hardware = info.hardware
|
||||
if not current_hardware == hardware:
|
||||
huggingface_hub.request_space_hardware(space_id, hardware) # type: ignore
|
||||
print(
|
||||
f"-------\nNOTE: this Space uses upgraded hardware: {hardware}... see billing info at https://huggingface.co/settings/billing\n-------"
|
||||
)
|
||||
if secrets is not None:
|
||||
for key, value in secrets.items():
|
||||
huggingface_hub.add_space_secret(space_id, key, value, token=hf_token)
|
||||
if verbose:
|
||||
print("")
|
||||
client = cls(
|
||||
space_id, hf_token=hf_token, max_workers=max_workers, verbose=verbose
|
||||
)
|
||||
return client
|
||||
|
||||
def _get_space_state(self):
|
||||
if not self.space_id:
|
||||
return None
|
||||
api = huggingface_hub.HfApi(token=self.hf_token)
|
||||
return api.get_space_runtime(self.space_id).stage
|
||||
info = huggingface_hub.get_space_runtime(self.space_id, token=self.hf_token)
|
||||
return info.stage
|
||||
|
||||
def predict(
|
||||
self,
|
||||
@ -520,22 +620,29 @@ class Endpoint:
|
||||
|
||||
if self.use_ws:
|
||||
result = utils.synchronize_async(self._ws_fn, data, hash_data, helper)
|
||||
output = result["data"]
|
||||
if "error" in result:
|
||||
raise ValueError(result["error"])
|
||||
else:
|
||||
response = requests.post(
|
||||
self.client.api_url, headers=self.client.headers, data=data
|
||||
)
|
||||
result = json.loads(response.content.decode("utf-8"))
|
||||
try:
|
||||
output = result["data"]
|
||||
except KeyError:
|
||||
if "error" in result and "429" in result["error"]:
|
||||
raise utils.TooManyRequestsError(
|
||||
"Too many requests to the Hugging Face API"
|
||||
)
|
||||
raise KeyError(
|
||||
f"Could not find 'data' key in response. Response received: {result}"
|
||||
try:
|
||||
output = result["data"]
|
||||
except KeyError:
|
||||
is_public_space = (
|
||||
self.client.space_id
|
||||
and not huggingface_hub.space_info(self.client.space_id).private
|
||||
)
|
||||
if "error" in result and "429" in result["error"] and is_public_space:
|
||||
raise utils.TooManyRequestsError(
|
||||
f"Too many requests to the API, please try again later. To avoid being rate-limited, please duplicate the Space using Client.duplicate({self.client.space_id}) and pass in your Hugging Face token."
|
||||
)
|
||||
elif "error" in result:
|
||||
raise ValueError(result["error"])
|
||||
raise KeyError(
|
||||
f"Could not find 'data' key in response. Response received: {result}"
|
||||
)
|
||||
return tuple(output)
|
||||
|
||||
return _predict
|
||||
@ -893,7 +1000,7 @@ class Job(Future):
|
||||
if self.verbose and self.space_id and eta and eta > 30:
|
||||
print(
|
||||
f"Due to heavy traffic on this app, the prediction will take approximately {int(eta)} seconds."
|
||||
f"For faster predictions without waiting in queue, you may duplicate the space: {utils.DUPLICATE_URL.format(self.space_id)}"
|
||||
f"For faster predictions without waiting in queue, you may duplicate the space using: Client.duplicate({self.space_id})"
|
||||
)
|
||||
return self.communicator.job.latest_status
|
||||
|
||||
|
@ -18,6 +18,7 @@ from typing import Any, Callable, Dict, List, Tuple
|
||||
|
||||
import fsspec.asyn
|
||||
import httpx
|
||||
import huggingface_hub
|
||||
import requests
|
||||
from websockets.legacy.protocol import WebSocketCommonProtocol
|
||||
|
||||
@ -25,16 +26,16 @@ API_URL = "/api/predict/"
|
||||
WS_URL = "/queue/join"
|
||||
UPLOAD_URL = "/upload"
|
||||
RESET_URL = "/reset"
|
||||
DUPLICATE_URL = "https://huggingface.co/spaces/{}?duplicate=true"
|
||||
SPACE_URL = "https://hf.space/{}"
|
||||
STATE_COMPONENT = "state"
|
||||
INVALID_RUNTIME = [
|
||||
"NO_APP_FILE",
|
||||
"CONFIG_ERROR",
|
||||
"BUILDING",
|
||||
"BUILD_ERROR",
|
||||
"RUNTIME_ERROR",
|
||||
"PAUSED",
|
||||
]
|
||||
BUILDING_RUNTIME = "BUILDING"
|
||||
|
||||
__version__ = (pkgutil.get_data(__name__, "version.txt") or b"").decode("ascii").strip()
|
||||
|
||||
@ -386,6 +387,26 @@ def file_to_json(file_path: str | Path) -> Dict:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
###########################
|
||||
# HuggingFace Hub API Utils
|
||||
###########################
|
||||
def set_space_timeout(
|
||||
space_id: str,
|
||||
hf_token: str | None = None,
|
||||
timeout_in_seconds: int = 300,
|
||||
):
|
||||
headers = huggingface_hub.utils.build_hf_headers(
|
||||
token=hf_token,
|
||||
library_name="gradio_client",
|
||||
library_version=__version__,
|
||||
)
|
||||
requests.post(
|
||||
f"https://huggingface.co/api/spaces/{space_id}/sleeptime",
|
||||
json={"seconds": timeout_in_seconds},
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
|
||||
########################
|
||||
# Misc utils
|
||||
########################
|
||||
|
@ -100,7 +100,7 @@ class TestPredictionsFromSpaces:
|
||||
self,
|
||||
):
|
||||
client = Client(src="gradio/count_generator")
|
||||
job = client.submit(3, api_name="/count")
|
||||
job = client.submit(3, fn_index=0)
|
||||
|
||||
while not job.done():
|
||||
time.sleep(0.1)
|
||||
@ -108,7 +108,7 @@ class TestPredictionsFromSpaces:
|
||||
assert job.outputs() == [str(i) for i in range(3)]
|
||||
|
||||
outputs = []
|
||||
for o in client.submit(3, api_name="/count"):
|
||||
for o in client.submit(3, fn_index=0):
|
||||
outputs.append(o)
|
||||
assert outputs == [str(i) for i in range(3)]
|
||||
|
||||
@ -122,8 +122,8 @@ class TestPredictionsFromSpaces:
|
||||
@pytest.mark.flaky
|
||||
def test_timeout(self):
|
||||
with pytest.raises(TimeoutError):
|
||||
client = Client(src="gradio/count_generator")
|
||||
job = client.submit(api_name="/sleep")
|
||||
client = Client(src="gradio-tests/sleep")
|
||||
job = client.submit("ping", api_name="/predict")
|
||||
job.result(timeout=0.05)
|
||||
|
||||
@pytest.mark.flaky
|
||||
@ -543,3 +543,56 @@ class TestEndpoints:
|
||||
["file5", "file6"],
|
||||
"file7",
|
||||
]
|
||||
|
||||
|
||||
class TestDuplication:
|
||||
@pytest.mark.flaky
|
||||
@patch("huggingface_hub.get_space_runtime", return_value=MagicMock(hardware="cpu"))
|
||||
@patch("gradio_client.client.Client.__init__", return_value=None)
|
||||
def test_new_space_id(self, mock_init, mock_runtime):
|
||||
Client.duplicate("gradio/calculator", "test", hf_token=HF_TOKEN)
|
||||
mock_runtime.assert_any_call("gradio/calculator", token=HF_TOKEN)
|
||||
mock_runtime.assert_any_call("gradio-tests/test", token=HF_TOKEN)
|
||||
mock_init.assert_called_with(
|
||||
"gradio-tests/test", hf_token=HF_TOKEN, max_workers=40, verbose=True
|
||||
)
|
||||
Client.duplicate("gradio/calculator", "gradio-tests/test", hf_token=HF_TOKEN)
|
||||
mock_runtime.assert_any_call("gradio/calculator", token=HF_TOKEN)
|
||||
mock_runtime.assert_any_call("gradio-tests/test", token=HF_TOKEN)
|
||||
mock_init.assert_called_with(
|
||||
"gradio-tests/test", hf_token=HF_TOKEN, max_workers=40, verbose=True
|
||||
)
|
||||
|
||||
@pytest.mark.flaky
|
||||
@patch("huggingface_hub.get_space_runtime", return_value=MagicMock(hardware="cpu"))
|
||||
@patch("gradio_client.client.Client.__init__", return_value=None)
|
||||
def test_default_space_id(self, mock_init, mock_runtime):
|
||||
Client.duplicate("gradio/calculator", hf_token=HF_TOKEN)
|
||||
mock_runtime.assert_any_call("gradio/calculator", token=HF_TOKEN)
|
||||
mock_runtime.assert_any_call("gradio-tests/calculator", token=HF_TOKEN)
|
||||
mock_init.assert_called_with(
|
||||
"gradio-tests/calculator", hf_token=HF_TOKEN, max_workers=40, verbose=True
|
||||
)
|
||||
|
||||
@pytest.mark.flaky
|
||||
@patch("huggingface_hub.get_space_runtime", return_value=MagicMock(hardware="cpu"))
|
||||
@patch("huggingface_hub.add_space_secret")
|
||||
@patch("gradio_client.client.Client.__init__", return_value=None)
|
||||
def test_add_secrets(self, mock_init, mock_add_secret, mock_runtime):
|
||||
Client.duplicate(
|
||||
"gradio/calculator",
|
||||
hf_token=HF_TOKEN,
|
||||
secrets={"test_key": "test_value", "test_key2": "test_value2"},
|
||||
)
|
||||
mock_add_secret.assert_any_call(
|
||||
"gradio-tests/calculator",
|
||||
"test_key",
|
||||
"test_value",
|
||||
token=HF_TOKEN,
|
||||
)
|
||||
mock_add_secret.assert_any_call(
|
||||
"gradio-tests/calculator",
|
||||
"test_key2",
|
||||
"test_value2",
|
||||
token=HF_TOKEN,
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user