Skip to content

Commit

Permalink
refactor llm run script
Browse files Browse the repository at this point in the history
  • Loading branch information
amarlearning committed Jun 8, 2024
1 parent 3c0622c commit ebb50c5
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 120 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ADD . /app

RUN apt-get update && apt-get install -y python3 python3-pip

RUN apt-get update && apt-get install -y curl git make
RUN apt-get update && apt-get install -y curl

RUN pip install poetry

Expand Down
166 changes: 47 additions & 119 deletions scripts/run-llm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,11 @@

set -e

# required utils: curl, git, make
# required utils: curl
if ! command -v curl &> /dev/null; then
printf "[-] curl not found\n"
exit 1
fi
if ! command -v git &> /dev/null; then
printf "[-] git not found\n"
exit 1
fi
if ! command -v make &> /dev/null; then
printf "[-] make not found\n"
exit 1
fi

# parse arguments
port=8080
Expand All @@ -45,68 +37,9 @@ n_kv=4096
verbose=0
log_prompts=0
log_stat=0
# 0: server mode
# 1: local mode
# mode=0
# 0: non-interactive
# 1: interactive
interactive=0
model=""
# ggml version: latest or bxxxx
ggml_version="latest"

function print_usage {
printf "Usage:\n"
printf " ./run-llm.sh [--port]\n\n"
printf " --model: model name\n"
printf " --interactive: run in interactive mode\n"
printf " --port: port number, default is 8080\n"
printf " --ggml-version: ggml version, default is latest\n"
printf "Example:\n\n"
printf ' bash <(curl -sSfL 'https://code.flows.network/webhook/iwYN1SdN3AmPgR5ao5Gt/run-llm.sh')"\n\n'
}

while [[ $# -gt 0 ]]; do
key="$1"
case $key in
--model)
model="$2"
shift
shift
;;
--interactive)
interactive=1
shift
;;
--port)
port="$2"
shift
shift
;;
--ggml-version)
ggml_version="$2"
shift
shift
;;
--help)
print_usage
exit 0
;;
*)
echo "Unknown argument: $key"
print_usage
exit 1
;;
esac
done

# available weights types
wtypes=("Q2_K" "Q3_K_L" "Q3_K_M" "Q3_K_S" "Q4_0" "Q4_K_M" "Q4_K_S" "Q5_0" "Q5_K_M" "Q5_K_S" "Q6_K" "Q8_0")

wfiles=()
for wt in "${wtypes[@]}"; do
wfiles+=("")
done

ss_urls=(
"https://huggingface.co/SanctumAI/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/meta-llama-3-8b-instruct.Q8_0.gguf"
Expand All @@ -122,53 +55,48 @@ prompt_types=(
"llama-3-chat"
)

if [ "$interactive" -eq 0 ]; then

printf "\n"
# * install WasmEdge + wasi-nn_ggml plugin
printf "[+] Installing WasmEdge with wasi-nn_ggml plugin ...\n\n"

if [ "$ggml_version" = "latest" ]; then
if curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/5b7a8deaacc1382d77676537daeb93fc9db8c756/utils/install_v2.sh | bash -s -- -v 0.13.5 --rustls; then
source $HOME/.wasmedge/env
wasmedge_path=$(which wasmedge)
printf "\n The WasmEdge Runtime is installed in %s.\n\n" "$wasmedge_path"
else
echo "Failed to install WasmEdge"
exit 1
fi
else
ggml_plugin="wasi_nn-ggml-$ggml_version"
if curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- -v 0.13.5 --plugins $ggml_plugin wasmedge_rustls; then
source $HOME/.wasmedge/env
wasmedge_path=$(which wasmedge)
printf "\n The WasmEdge Runtime is installed in %s.\n\n" "$wasmedge_path"
else
echo "Failed to install WasmEdge"
exit 1
fi
fi
# * install WasmEdge + wasi-nn_ggml plugin
printf "[+] Installing WasmEdge with wasi-nn_ggml plugin ...\n\n"

printf "\n"
if curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/5b7a8deaacc1382d77676537daeb93fc9db8c756/utils/install_v2.sh | bash -s -- -v 0.13.5 --rustls; then
source $HOME/.wasmedge/env
wasmedge_path=$(which wasmedge)
printf "\n The WasmEdge Runtime is installed in %s.\n\n" "$wasmedge_path"
else
echo "Failed to install WasmEdge"
exit 1
fi

# * download meta-llama-3-8b-instruct.Q8_0.gguf
ss_url="https://huggingface.co/SanctumAI/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/meta-llama-3-8b-instruct.Q8_0.gguf"
wfile=$(basename "$ss_url")
if [ -f "$wfile" ]; then
printf "[+] Using cached model %s \n" "$wfile"
else
printf "[+] Downloading %s ...\n" "$ss_url"
printf "\n"

# download the weights file
curl -o "$wfile" -# -L "$ss_url"
fi
# * download meta-llama-3-8b-instruct.Q8_0.gguf
ss_url="https://huggingface.co/SanctumAI/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/meta-llama-3-8b-instruct.Q8_0.gguf"
wfile=$(basename "$ss_url")
if [ -f "$wfile" ]; then
printf "[+] Using cached model %s \n" "$wfile"
else
printf "[+] Downloading %s ...\n" "$ss_url"

# * download llama-api-server.wasm
printf "[+] Downloading the latest llama-api-server.wasm ...\n"
curl -LO https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm
printf "\n"
# download the weights file
curl -o "$wfile" -# -L "$ss_url"
fi

# * download llama-api-server.wasm
wasm_url="https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
wasmfile=$(basename "$wasm_url")
if [ -f "$wasmfile" ]; then
printf "[+] Using cached file %s \n" "$wasmfile"
else
printf "[+] Downloading %s ...\n" "$wasm_url"

# download the weights file
curl -o "$wasmfile" -# -L "$wasm_url"
fi

# * download chatbot-ui

# * download chatbot-ui
if [ ! -d "chatbot-ui" ]; then
printf "[+] Downloading Chatbot web app ...\n"
files_tarball="https://github.com/second-state/chatbot-ui/releases/latest/download/chatbot-ui.tar.gz"
curl -LO $files_tarball
Expand All @@ -179,17 +107,17 @@ if [ "$interactive" -eq 0 ]; then
tar xzf chatbot-ui.tar.gz
rm chatbot-ui.tar.gz
printf "\n"

# * start llama-api-server
cmd="wasmedge --dir .:. --nn-preload default:GGML:AUTO:meta-llama-3-8b-instruct.Q8_0.gguf llama-api-server.wasm -p llama-3-chat -c 8192 --model-name meta-llama-3-8b-instruct --socket-addr 0.0.0.0:${port} --log-prompts --log-stat"

printf "[+] Will run the following command to start the server:\n\n"
printf " %s\n\n" "$cmd"
printf " Chatbot web app can be accessed at http://0.0.0.0:%s after the server is started\n\n\n" "$port"
printf "*********************************** LlamaEdge API Server ********************************\n\n"
eval $cmd
else
echo "Invalid value for interactive"
printf "[+] Using cached Chatbot web app. Skipping download.\n"
fi

# * start llama-api-server
cmd="wasmedge --dir .:. --nn-preload default:GGML:AUTO:meta-llama-3-8b-instruct.Q8_0.gguf llama-api-server.wasm -p llama-3-chat -c 8192 --model-name meta-llama-3-8b-instruct --socket-addr 0.0.0.0:${port} --log-prompts --log-stat"

printf "[+] Will run the following command to start the server:\n\n"
printf " %s\n\n" "$cmd"
printf " Chatbot web app can be accessed at http://0.0.0.0:%s after the server is started\n\n\n" "$port"
printf "*********************************** LlamaEdge API Server ********************************\n\n"
eval $cmd

exit 0

0 comments on commit ebb50c5

Please sign in to comment.