Allow client to be duplicated from an existing Space (#3809)

* client

* readme

* readme

* fixes

* cleanup

* error messages

* format

* updates

* format

* build

* fixes

* fix tests

* Update client/python/gradio_client/client.py

Co-authored-by: aliabid94 <aabid94@gmail.com>

* set timeout

* rename

* format

* rename

* sleep timeout

* format

* mock secrets

* format

* format

* docstring

* fix test

---------

Co-authored-by: aliabid94 <aabid94@gmail.com>
This commit is contained in:
Abubakar Abid 2023-04-12 13:26:37 -07:00 committed by GitHub
parent 0b725fbac1
commit 52d0885937
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 226 additions and 24 deletions

View File

@ -53,6 +53,26 @@ from gradio_client import Client
client = Client("abidlabs/my-private-space", hf_token="...")
```
**Duplicating a Space for private use**
While you can use any public Space as an API, you may get rate limited by Hugging Face if you make too many requests. For unlimited usage of a Space, simply duplicate the Space to create a private Space,
and then use it to make as many requests as you'd like!
The `gradio_client` includes a class method: `Client.duplicate()` to make this process simple:
```python
from gradio_client import Client
client = Client.duplicate("abidlabs/whisper")
client.predict("audio_sample.wav")
>> "This is a test of the whisper speech recognition model."
```
If you have previously duplicated a Space, re-running `duplicate()` will *not* create a new Space. Instead, the Client will attach to the previously-created Space. So it is safe to re-run the `Client.duplicate()` method multiple times.
**Note:** if the original Space uses GPUs, your private Space will as well, and your Hugging Face account will get billed based on the price of the GPU. To minimize charges, your Space will automatically go to sleep after 1 hour of inactivity. You can also set the hardware using the `hardware` parameter of `duplicate()`.
**Connecting a general Gradio app**
@ -64,9 +84,10 @@ from gradio_client import Client
client = Client("https://bec81a83-5b5c-471e.gradio.live")
```
### Inspecting the API endpoints
Once you have connected to a Gradio app, you can view the APIs that are available to you by calling the `Client.view_api()` method. For the Whisper Space, we see the following:
Once you have connected to a Gradio app, you can view the APIs that are available to you by calling the `.view_api()` method. For the Whisper Space, we see the following:
```
Client.predict() Usage Info

View File

@ -17,7 +17,11 @@ from typing import Any, Callable, Dict, List, Tuple
import huggingface_hub
import requests
import websockets
from huggingface_hub.utils import build_hf_headers, send_telemetry
from huggingface_hub.utils import (
RepositoryNotFoundError,
build_hf_headers,
send_telemetry,
)
from packaging import version
from typing_extensions import Literal
@ -58,7 +62,7 @@ class Client:
"""
Parameters:
src: Either the name of the Hugging Face Space to load, (e.g. "abidlabs/whisper-large-v2") or the full URL (including "http" or "https") of the hosted Gradio app to load (e.g. "http://mydomain.com/app" or "https://bec81a83-5b5c-471e.gradio.live/").
hf_token: The Hugging Face token to use to access private Spaces. Automatically fetched if you are logged in via the Hugging Face Hub CLI.
hf_token: The Hugging Face token to use to access private Spaces. Automatically fetched if you are logged in via the Hugging Face Hub CLI. Obtain from: https://huggingface.co/settings/token
max_workers: The maximum number of thread workers that can be used to make requests to the remote Gradio app simultaneously.
verbose: Whether the client should print statements to the console.
"""
@ -81,14 +85,20 @@ class Client:
)
self.space_id = src
self.src = _src
if self.verbose:
print(f"Loaded as API: {self.src}")
state = self._get_space_state()
if state == utils.BUILDING_RUNTIME:
if self.verbose:
print("Space is still building. Please wait...")
while self._get_space_state() == utils.BUILDING_RUNTIME:
time.sleep(2) # so we don't get rate limited by the API
pass
if state in utils.INVALID_RUNTIME:
raise ValueError(
f"The current space is in the invalid state: {state}. "
"Please contact the owner to fix this."
)
if self.verbose:
print(f"Loaded as API: {self.src}")
self.api_url = urllib.parse.urljoin(self.src, utils.API_URL)
self.ws_url = urllib.parse.urljoin(
@ -110,11 +120,101 @@ class Client:
# Disable telemetry by setting the env variable HF_HUB_DISABLE_TELEMETRY=1
threading.Thread(target=self._telemetry_thread).start()
@classmethod
def duplicate(
cls,
from_id: str,
to_id: str | None = None,
hf_token: str | None = None,
private: bool = True,
hardware: str | None = None,
secrets: Dict[str, str] | None = None,
sleep_timeout: int = 5,
max_workers: int = 40,
verbose: bool = True,
):
"""
Duplicates a Hugging Face Space under your account and returns a Client object
for the new Space. No duplication is created if the Space already exists in your
account (to override this, provide a new name for the new Space using `to_id`).
To use this method, you must provide an `hf_token` or be logged in via the Hugging
Face Hub CLI.
The new Space will be private by default and use the same hardware as the original
Space. This can be changed by using the `private` and `hardware` parameters. For
hardware upgrades (beyond the basic CPU tier), you may be required to provide
billing information on Hugging Face: https://huggingface.co/settings/billing
Parameters:
from_id: The name of the Hugging Face Space to duplicate in the format "{username}/{space_id}", e.g. "gradio/whisper".
to_id: The name of the new Hugging Face Space to create, e.g. "abidlabs/whisper-duplicate". If not provided, the new Space will be named "{your_HF_username}/{space_id}".
hf_token: The Hugging Face token to use to access private Spaces. Automatically fetched if you are logged in via the Hugging Face Hub CLI. Obtain from: https://huggingface.co/settings/token
private: Whether the new Space should be private (True) or public (False). Defaults to True.
hardware: The hardware tier to use for the new Space. Defaults to the same hardware tier as the original Space. Options include "cpu-basic", "cpu-upgrade", "t4-small", "t4-medium", "a10g-small", "a10g-large", "a100-large", subject to availability.
secrets: A dictionary of (secret key, secret value) to pass to the new Space. Defaults to None.
sleep_timeout: The number of minutes after which the duplicate Space will be puased if no requests are made to it (to minimize billing charges). Defaults to 5 minutes.
max_workers: The maximum number of thread workers that can be used to make requests to the remote Gradio app simultaneously.
verbose: Whether the client should print statements to the console.
"""
try:
info = huggingface_hub.get_space_runtime(from_id, token=hf_token)
except RepositoryNotFoundError:
raise ValueError(
f"Could not find Space: {from_id}. If it is a private Space, please provide an `hf_token`."
)
if to_id:
if "/" in to_id:
to_id = to_id.split("/")[1]
space_id = huggingface_hub.get_full_repo_name(to_id, token=hf_token)
else:
space_id = huggingface_hub.get_full_repo_name(
from_id.split("/")[1], token=hf_token
)
try:
huggingface_hub.get_space_runtime(space_id, token=hf_token)
if verbose:
print(
f"Using your existing Space: {utils.SPACE_URL.format(space_id)} 🤗"
)
except RepositoryNotFoundError:
if verbose:
print(f"Creating a duplicate of {from_id} for your own use... 🤗")
huggingface_hub.duplicate_space(
from_id=from_id,
to_id=space_id,
token=hf_token,
exist_ok=True,
private=private,
)
utils.set_space_timeout(
space_id, hf_token=hf_token, timeout_in_seconds=sleep_timeout * 60
)
if verbose:
print(f"Created new Space: {utils.SPACE_URL.format(space_id)}")
current_info = huggingface_hub.get_space_runtime(space_id, token=hf_token)
current_hardware = current_info.hardware or "cpu-basic"
if hardware is None:
hardware = info.hardware
if not current_hardware == hardware:
huggingface_hub.request_space_hardware(space_id, hardware) # type: ignore
print(
f"-------\nNOTE: this Space uses upgraded hardware: {hardware}... see billing info at https://huggingface.co/settings/billing\n-------"
)
if secrets is not None:
for key, value in secrets.items():
huggingface_hub.add_space_secret(space_id, key, value, token=hf_token)
if verbose:
print("")
client = cls(
space_id, hf_token=hf_token, max_workers=max_workers, verbose=verbose
)
return client
def _get_space_state(self):
if not self.space_id:
return None
api = huggingface_hub.HfApi(token=self.hf_token)
return api.get_space_runtime(self.space_id).stage
info = huggingface_hub.get_space_runtime(self.space_id, token=self.hf_token)
return info.stage
def predict(
self,
@ -520,22 +620,29 @@ class Endpoint:
if self.use_ws:
result = utils.synchronize_async(self._ws_fn, data, hash_data, helper)
output = result["data"]
if "error" in result:
raise ValueError(result["error"])
else:
response = requests.post(
self.client.api_url, headers=self.client.headers, data=data
)
result = json.loads(response.content.decode("utf-8"))
try:
output = result["data"]
except KeyError:
if "error" in result and "429" in result["error"]:
raise utils.TooManyRequestsError(
"Too many requests to the Hugging Face API"
)
raise KeyError(
f"Could not find 'data' key in response. Response received: {result}"
try:
output = result["data"]
except KeyError:
is_public_space = (
self.client.space_id
and not huggingface_hub.space_info(self.client.space_id).private
)
if "error" in result and "429" in result["error"] and is_public_space:
raise utils.TooManyRequestsError(
f"Too many requests to the API, please try again later. To avoid being rate-limited, please duplicate the Space using Client.duplicate({self.client.space_id}) and pass in your Hugging Face token."
)
elif "error" in result:
raise ValueError(result["error"])
raise KeyError(
f"Could not find 'data' key in response. Response received: {result}"
)
return tuple(output)
return _predict
@ -893,7 +1000,7 @@ class Job(Future):
if self.verbose and self.space_id and eta and eta > 30:
print(
f"Due to heavy traffic on this app, the prediction will take approximately {int(eta)} seconds."
f"For faster predictions without waiting in queue, you may duplicate the space: {utils.DUPLICATE_URL.format(self.space_id)}"
f"For faster predictions without waiting in queue, you may duplicate the space using: Client.duplicate({self.space_id})"
)
return self.communicator.job.latest_status

View File

@ -18,6 +18,7 @@ from typing import Any, Callable, Dict, List, Tuple
import fsspec.asyn
import httpx
import huggingface_hub
import requests
from websockets.legacy.protocol import WebSocketCommonProtocol
@ -25,16 +26,16 @@ API_URL = "/api/predict/"
WS_URL = "/queue/join"
UPLOAD_URL = "/upload"
RESET_URL = "/reset"
DUPLICATE_URL = "https://huggingface.co/spaces/{}?duplicate=true"
SPACE_URL = "https://hf.space/{}"
STATE_COMPONENT = "state"
INVALID_RUNTIME = [
"NO_APP_FILE",
"CONFIG_ERROR",
"BUILDING",
"BUILD_ERROR",
"RUNTIME_ERROR",
"PAUSED",
]
BUILDING_RUNTIME = "BUILDING"
__version__ = (pkgutil.get_data(__name__, "version.txt") or b"").decode("ascii").strip()
@ -386,6 +387,26 @@ def file_to_json(file_path: str | Path) -> Dict:
return json.load(f)
###########################
# HuggingFace Hub API Utils
###########################
def set_space_timeout(
space_id: str,
hf_token: str | None = None,
timeout_in_seconds: int = 300,
):
headers = huggingface_hub.utils.build_hf_headers(
token=hf_token,
library_name="gradio_client",
library_version=__version__,
)
requests.post(
f"https://huggingface.co/api/spaces/{space_id}/sleeptime",
json={"seconds": timeout_in_seconds},
headers=headers,
)
########################
# Misc utils
########################

View File

@ -100,7 +100,7 @@ class TestPredictionsFromSpaces:
self,
):
client = Client(src="gradio/count_generator")
job = client.submit(3, api_name="/count")
job = client.submit(3, fn_index=0)
while not job.done():
time.sleep(0.1)
@ -108,7 +108,7 @@ class TestPredictionsFromSpaces:
assert job.outputs() == [str(i) for i in range(3)]
outputs = []
for o in client.submit(3, api_name="/count"):
for o in client.submit(3, fn_index=0):
outputs.append(o)
assert outputs == [str(i) for i in range(3)]
@ -122,8 +122,8 @@ class TestPredictionsFromSpaces:
@pytest.mark.flaky
def test_timeout(self):
with pytest.raises(TimeoutError):
client = Client(src="gradio/count_generator")
job = client.submit(api_name="/sleep")
client = Client(src="gradio-tests/sleep")
job = client.submit("ping", api_name="/predict")
job.result(timeout=0.05)
@pytest.mark.flaky
@ -543,3 +543,56 @@ class TestEndpoints:
["file5", "file6"],
"file7",
]
class TestDuplication:
@pytest.mark.flaky
@patch("huggingface_hub.get_space_runtime", return_value=MagicMock(hardware="cpu"))
@patch("gradio_client.client.Client.__init__", return_value=None)
def test_new_space_id(self, mock_init, mock_runtime):
Client.duplicate("gradio/calculator", "test", hf_token=HF_TOKEN)
mock_runtime.assert_any_call("gradio/calculator", token=HF_TOKEN)
mock_runtime.assert_any_call("gradio-tests/test", token=HF_TOKEN)
mock_init.assert_called_with(
"gradio-tests/test", hf_token=HF_TOKEN, max_workers=40, verbose=True
)
Client.duplicate("gradio/calculator", "gradio-tests/test", hf_token=HF_TOKEN)
mock_runtime.assert_any_call("gradio/calculator", token=HF_TOKEN)
mock_runtime.assert_any_call("gradio-tests/test", token=HF_TOKEN)
mock_init.assert_called_with(
"gradio-tests/test", hf_token=HF_TOKEN, max_workers=40, verbose=True
)
@pytest.mark.flaky
@patch("huggingface_hub.get_space_runtime", return_value=MagicMock(hardware="cpu"))
@patch("gradio_client.client.Client.__init__", return_value=None)
def test_default_space_id(self, mock_init, mock_runtime):
Client.duplicate("gradio/calculator", hf_token=HF_TOKEN)
mock_runtime.assert_any_call("gradio/calculator", token=HF_TOKEN)
mock_runtime.assert_any_call("gradio-tests/calculator", token=HF_TOKEN)
mock_init.assert_called_with(
"gradio-tests/calculator", hf_token=HF_TOKEN, max_workers=40, verbose=True
)
@pytest.mark.flaky
@patch("huggingface_hub.get_space_runtime", return_value=MagicMock(hardware="cpu"))
@patch("huggingface_hub.add_space_secret")
@patch("gradio_client.client.Client.__init__", return_value=None)
def test_add_secrets(self, mock_init, mock_add_secret, mock_runtime):
Client.duplicate(
"gradio/calculator",
hf_token=HF_TOKEN,
secrets={"test_key": "test_value", "test_key2": "test_value2"},
)
mock_add_secret.assert_any_call(
"gradio-tests/calculator",
"test_key",
"test_value",
token=HF_TOKEN,
)
mock_add_secret.assert_any_call(
"gradio-tests/calculator",
"test_key2",
"test_value2",
token=HF_TOKEN,
)