mirror of
https://github.com/cocktailpeanut/dalai
synced 2025-03-06 18:53:01 +01:00
dalai alpaca
This commit is contained in:
parent
607f17af29
commit
a4cbf1c73c
80
alpaca.js
Normal file
80
alpaca.js
Normal file
@ -0,0 +1,80 @@
|
||||
const path = require('path');
|
||||
const term = require( 'terminal-kit' ).terminal;
|
||||
const git = require('isomorphic-git');
|
||||
const Downloader = require("nodejs-file-downloader");
|
||||
const http = require('isomorphic-git/http/node');
|
||||
const os = require('os');
|
||||
const fs = require("fs");
|
||||
const platform = os.platform()
|
||||
class Alpaca {
|
||||
constructor(root) {
|
||||
this.root = root
|
||||
this.home = path.resolve(this.root.home, "alpaca")
|
||||
this.url = "https://github.com/cocktailpeanut/alpaca.cpp.git"
|
||||
this.launcher = {
|
||||
win32: "chat",
|
||||
linux: "chat",
|
||||
darwin: "chat",
|
||||
}
|
||||
}
|
||||
async make() {
|
||||
let success
|
||||
if (platform === "win32") {
|
||||
// CMake on Windows
|
||||
const venv_path = path.join(this.root.home, "venv")
|
||||
const cmake_path = path.join(venv_path, "Scripts", "cmake")
|
||||
await this.root.exec("mkdir build", this.home)
|
||||
await this.root.exec(`Remove-Item -path ${path.resolve(this.home, "build", "CMakeCache.txt")}`, this.home)
|
||||
await this.root.exec(`make chat`, this.home)
|
||||
} else {
|
||||
// Make on linux + mac
|
||||
success = await this.root.exec(`make chat`, this.home)
|
||||
if (!success) {
|
||||
throw new Error("running 'make' failed")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
async get (...models) {
|
||||
for(let model of models) {
|
||||
const venv_path = path.join(this.root.home, "venv")
|
||||
const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
|
||||
/**************************************************************************************************************
|
||||
*
|
||||
* 5. Download models + convert + quantize
|
||||
*
|
||||
**************************************************************************************************************/
|
||||
const outputFile = path.resolve(this.home, 'models', model, 'ggml-model-q4_0.bin')
|
||||
if (fs.existsSync(outputFile)) {
|
||||
console.log(`Skip conversion, file already exists: ${outputFile}`)
|
||||
} else {
|
||||
const task = `downloading ${outputFile}`
|
||||
const downloader = new Downloader({
|
||||
url: "https://gateway.estuary.tech/gw/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC",
|
||||
//url: "https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC",
|
||||
//url: `https://cloudflare-ipfs.com/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC`,
|
||||
fileName: 'ggml-model-q4_0.bin',
|
||||
directory: path.resolve(this.home, "models", model),
|
||||
maxAttempts: 3, //Default is 1.
|
||||
onError: function (error) {
|
||||
//You can also hook into each failed attempt.
|
||||
console.log("Error from attempt ", error);
|
||||
},
|
||||
onProgress: (percentage, chunk, remainingSize) => {
|
||||
this.root.progress(task, percentage)
|
||||
},
|
||||
});
|
||||
try {
|
||||
await this.root.startProgress(task)
|
||||
await downloader.download();
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
this.root.progressBar.update(1);
|
||||
term("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
module.exports = Alpaca
|
63
bin/cli.js
63
bin/cli.js
@ -6,37 +6,42 @@ if (process.argv.length > 0) {
|
||||
if (cmd === "serve") {
|
||||
const port = (args.length > 0 ? parseInt(args[0]) : 3000)
|
||||
Web(port)
|
||||
} else if (cmd === "llama" || cmd === "install") {
|
||||
if (args.length === 0) args = ["7B"]
|
||||
for(let arg of args) {
|
||||
if (!["7B", "13B", "30B", "65B"].includes(arg)) {
|
||||
console.log(`##########################################################
|
||||
#
|
||||
# ERROR
|
||||
# The arguments must be one or more of the following:
|
||||
#
|
||||
# 7B, 13B, 30B, 65B
|
||||
#
|
||||
##########################################################
|
||||
|
||||
[Example]
|
||||
|
||||
# install just 7B (default)
|
||||
npx dalai install
|
||||
|
||||
# install 7B manually
|
||||
npx dalai install 7B
|
||||
|
||||
# install 7B and 13B
|
||||
npx dalai install 7B 13B
|
||||
`)
|
||||
process.exit(1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
new Dalai().install(...args).then(() => {
|
||||
} else if (cmd === "setup") {
|
||||
new Dalai().setup().then(() => {
|
||||
process.exit(0)
|
||||
}).catch((e) => {
|
||||
console.log("Error", e)
|
||||
process.exit(1)
|
||||
})
|
||||
} else {
|
||||
if (args.length > 0) {
|
||||
let core = cmd
|
||||
let [method, ...callparams] = args
|
||||
let dalai = new Dalai()
|
||||
console.log({ method, callparams })
|
||||
// 1. install => install the core module
|
||||
// 2. get => get models
|
||||
dalai[method](core, ...callparams).then(() => {
|
||||
process.exit(0)
|
||||
}).catch((e) => {
|
||||
console.log("ERROR", e)
|
||||
process.exit(1)
|
||||
})
|
||||
} else {
|
||||
console.log("############################################")
|
||||
console.log("#")
|
||||
console.log("# Supported Commands:")
|
||||
console.log("#")
|
||||
console.log("# 1. System command")
|
||||
console.log("#")
|
||||
console.log("# dalai serve <port (optional)>")
|
||||
console.log("#")
|
||||
console.log("# 2. Model command")
|
||||
console.log("#")
|
||||
console.log("# dalai llama get <model names>")
|
||||
console.log("#")
|
||||
console.log("############################################")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.log("ERROR: Please pass a command")
|
||||
|
@ -59,11 +59,11 @@
|
||||
flex-grow: 1;
|
||||
}
|
||||
input[type=text] , select {
|
||||
margin-right: 5px;
|
||||
border: none;
|
||||
background: rgba(0,0,0,0.08);
|
||||
padding: 5px 10px;
|
||||
box-sizing: border-box;
|
||||
width: 100px;
|
||||
}
|
||||
.logo {
|
||||
font-weight: bold;
|
||||
@ -83,6 +83,7 @@
|
||||
.kv {
|
||||
display: block;
|
||||
font-size: 14px;
|
||||
margin-left: 10px;
|
||||
}
|
||||
.kv label {
|
||||
display: block;
|
||||
@ -111,13 +112,13 @@
|
||||
const config = {
|
||||
seed: -1,
|
||||
threads: 4,
|
||||
n_predict: 1000,
|
||||
model: "7B",
|
||||
n_predict: 200,
|
||||
top_k: 40,
|
||||
top_p: 0.9,
|
||||
temp: 0.8,
|
||||
repeat_last_n: 64,
|
||||
repeat_penalty: 1.3,
|
||||
debug: false,
|
||||
models: []
|
||||
}
|
||||
const socket = io();
|
||||
@ -125,17 +126,27 @@ const form = document.getElementById('form');
|
||||
const input = document.querySelector('#input');
|
||||
const model = document.querySelector('#model');
|
||||
const renderHeader = (config) => {
|
||||
const fields = ["n_predict", "repeat_last_n", "repeat_penalty", "top_k", "top_p", "temp", "seed"].map((key) => {
|
||||
return `<div class='kv'>
|
||||
const fields = [{ key: "debug", type: "checkbox" }, "n_predict", "repeat_last_n", "repeat_penalty", "top_k", "top_p", "temp", "seed"].map((key) => {
|
||||
if (typeof key === "string") {
|
||||
return `<div class='kv'>
|
||||
<label>${key}</label>
|
||||
<input name="${key}" type='text' placeholder="${key}" value="${config[key] || ''}">
|
||||
</div>`
|
||||
} else {
|
||||
if (key.type === "checkbox") {
|
||||
return `<div class='kv'>
|
||||
<label>${key.key}</label>
|
||||
<input name="${key.key}" type='checkbox' ${config[key.key] ? "checked" : ""}>
|
||||
</div>`
|
||||
}
|
||||
}
|
||||
|
||||
}).join("")
|
||||
|
||||
|
||||
const models = config.models.map((model) => {
|
||||
return `<option value="7B" ${config.model === model ? "selected" : ""}>${model}</option>`
|
||||
config.model = config.models[0]
|
||||
const models = config.models.map((model, i) => {
|
||||
return `<option value="${model}" ${i === 0 ? "selected" : ""}>${model}</option>`
|
||||
}).join("")
|
||||
return `<a class='logo' href="/">Dalai</a><div class='stretch'></div>
|
||||
<div class='config-container'>
|
||||
@ -158,6 +169,8 @@ const loading = (on) => {
|
||||
document.querySelector("form").addEventListener("input", (e) => {
|
||||
if (e.target.tagName === "SELECT") {
|
||||
config[e.target.name] = config.models[e.target.selectedIndex]
|
||||
} else if (e.target.type === "checkbox") {
|
||||
config[e.target.name] = e.target.checked
|
||||
} else {
|
||||
config[e.target.name] = e.target.value
|
||||
}
|
||||
@ -203,6 +216,7 @@ socket.emit('request', {
|
||||
})
|
||||
socket.on('result', async ({ request, response }) => {
|
||||
loading(false)
|
||||
console.log(response)
|
||||
if (request.method === "installed") {
|
||||
if (response == "\n\n<end>") {
|
||||
document.querySelector(".form-header").innerHTML = renderHeader(config)
|
||||
|
391
docs/README.md
391
docs/README.md
@ -1,6 +1,6 @@
|
||||
# Dalai
|
||||
|
||||
Run LLaMA on your computer.
|
||||
Run LLaMA and Alpaca on your computer.
|
||||
|
||||
<a href="https://github.com/cocktailpeanut/dalai" class='inverse btn'><i class="fa-brands fa-github"></i> Github</a>
|
||||
<a href="https://twitter.com/cocktailpeanut" class='inverse btn'><i class="fa-brands fa-twitter"></i> Twitter</a>
|
||||
@ -8,17 +8,23 @@ Run LLaMA on your computer.
|
||||
|
||||
---
|
||||
|
||||
#### JUST RUN THIS:
|
||||
## JUST RUN THIS
|
||||
|
||||
<img src="terminal.png" class='round'>
|
||||
<img src="alpa.png" class='round'>
|
||||
|
||||
#### TO GET:
|
||||
or
|
||||
|
||||

|
||||
<img src="llam.png" class='round'>
|
||||
|
||||
## TO GET
|
||||
|
||||
Both alpaca and llama working on your computer!
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
1. Powered by [llama.cpp](https://github.com/ggerganov/llama.cpp) and [llama-dl CDN](https://github.com/shawwn/llama-dl)
|
||||
1. Powered by [llama.cpp](https://github.com/ggerganov/llama.cpp), [llama-dl CDN](https://github.com/shawwn/llama-dl), and [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp)
|
||||
2. Hackable web app included
|
||||
3. Ships with JavaScript API
|
||||
4. Ships with [Socket.io](https://socket.io/) API
|
||||
@ -41,6 +47,19 @@ Runs on most modern computers. Unless your computer is very very old, it should
|
||||
|
||||
## 3. Disk Space Requirements
|
||||
|
||||
### Alpaca
|
||||
|
||||
Currently only the 7B model is available via [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp)
|
||||
|
||||
#### 7B
|
||||
|
||||
Alpaca comes fully quantized (compressed), and the only space you need for the 7B model is 4.21GB:
|
||||
|
||||

|
||||
|
||||
|
||||
### LLaMA
|
||||
|
||||
You need a lot of space for storing the models.
|
||||
|
||||
You do NOT have to install all models, you can install one by one. Let's take a look at how much space each model takes up:
|
||||
@ -51,28 +70,28 @@ You do NOT have to install all models, you can install one by one. Let's take a
|
||||
>
|
||||
> You can optimize this if you delete the original models (which are much larger) after installation and keep only the quantized versions.
|
||||
|
||||
### 7B
|
||||
#### 7B
|
||||
|
||||
- Full: The model takes up 31.17GB
|
||||
- Quantized: 4.21GB
|
||||
|
||||

|
||||
|
||||
### 13B
|
||||
#### 13B
|
||||
|
||||
- Full: The model takes up 60.21GB
|
||||
- Quantized: 4.07GB * 2 = 8.14GB
|
||||
|
||||

|
||||
|
||||
### 30B
|
||||
#### 30B
|
||||
|
||||
- Full: The model takes up 150.48GB
|
||||
- Quantized: 5.09GB * 4 = 20.36GB
|
||||
|
||||

|
||||
|
||||
### 65B
|
||||
#### 65B
|
||||
|
||||
- Full: The model takes up 432.64GB
|
||||
- Quantized: 5.11GB * 8 = 40.88GB
|
||||
@ -91,28 +110,67 @@ You do NOT have to install all models, you can install one by one. Let's take a
|
||||
|
||||
### Step 2. Install Dalai
|
||||
|
||||
Basic install (7B model only)
|
||||
First install dalai:
|
||||
|
||||
```
|
||||
npx dalai llama
|
||||
npm install -g dalai
|
||||
```
|
||||
|
||||
Or, install all models
|
||||
### Step 3. Install Engines
|
||||
|
||||
Currently supported engines are `llama` and `alpaca`.
|
||||
|
||||
#### Install LLaMA
|
||||
|
||||
To install `llama`, run:
|
||||
|
||||
```
|
||||
npx dalai llama 7B 13B 30B 65B
|
||||
dalai llama install
|
||||
```
|
||||
|
||||
The install command :
|
||||
#### Install Alpaca
|
||||
|
||||
To install `alpaca`, run:
|
||||
|
||||
```
|
||||
dalai alpaca install
|
||||
```
|
||||
|
||||
### Step 4. Get Models
|
||||
|
||||
#### Download LLaMA models
|
||||
|
||||
To download llama models, you can run:
|
||||
|
||||
```
|
||||
dalai llama get 7B
|
||||
```
|
||||
|
||||
|
||||
or to download multiple models:
|
||||
|
||||
```
|
||||
dalai llama get 7B 13B
|
||||
```
|
||||
|
||||
#### Download Alpaca models
|
||||
|
||||
Currently alpaca only has the 7B model:
|
||||
|
||||
```
|
||||
dalai alpaca get 7B
|
||||
```
|
||||
|
||||
1. Creates a folder named `dalai` under your home directory (`~`)
|
||||
2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/llama.cpp`
|
||||
3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/llama.cpp/models`
|
||||
4. Runs some tasks to convert the LLaMA models so they can be used
|
||||
|
||||
### Step 3. Run Web UI
|
||||
|
||||
After everything has been installed, open http://localhost:3000 in your browser. Have fun!
|
||||
After everything has been installed, run the following command to launch the web UI server:
|
||||
|
||||
```
|
||||
dalai serve
|
||||
```
|
||||
|
||||
and open http://localhost:3000 in your browser. Have fun!
|
||||
|
||||
---
|
||||
|
||||
@ -126,8 +184,6 @@ Press the button below to visit the Visual Studio downloads page and download:
|
||||
|
||||
<a href="https://visualstudio.microsoft.com/downloads/" class='btn'>Download Microsoft Visual Studio</a>
|
||||
|
||||
---
|
||||
|
||||
**IMPORTANT!!!**
|
||||
|
||||
When installing Visual Studio, make sure to check the 3 options as highlighted below:
|
||||
@ -138,32 +194,22 @@ When installing Visual Studio, make sure to check the 3 options as highlighted b
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
### Step 2.1. Install Dalai
|
||||
|
||||
Basic install (7B model only)
|
||||
First install dalai:
|
||||
|
||||
```
|
||||
npx dalai llama
|
||||
npm install -g dalai
|
||||
```
|
||||
|
||||
Or, install all models
|
||||
|
||||
```
|
||||
npx dalai llama 7B 13B 30B 65B
|
||||
```
|
||||
|
||||
The install command :
|
||||
|
||||
1. Creates a folder named `dalai` under your home directory (`~`)
|
||||
2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/llama.cpp`
|
||||
3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/llama.cpp/models`
|
||||
4. Runs some tasks to convert the LLaMA models so they can be used
|
||||
|
||||
If this worked without any errors, go to step 3.
|
||||
|
||||
Ohterwise try the troubleshoot below:
|
||||
|
||||
---
|
||||
|
||||
### Step 2.2. Troubleshoot (optional)
|
||||
|
||||
In case above steps fail to install, try installing node.js and python separately.
|
||||
@ -181,44 +227,222 @@ After both have been installed, open powershell and type `python` to see if the
|
||||
Once you've checked that they both exist, try the `npx dalai llama` command again.
|
||||
|
||||
|
||||
### Step 3. Run Web UI
|
||||
---
|
||||
|
||||
|
||||
### Step 3. Install Engines
|
||||
|
||||
Currently supported engines are `llama` and `alpaca`.
|
||||
|
||||
#### Install LLaMA
|
||||
|
||||
To install `llama`, run:
|
||||
|
||||
```
|
||||
dalai llama install
|
||||
```
|
||||
|
||||
#### Install Alpaca
|
||||
|
||||
To install `alpaca`, run:
|
||||
|
||||
```
|
||||
dalai alpaca install
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 4. Get Models
|
||||
|
||||
#### Download LLaMA models
|
||||
|
||||
To download llama models, you can run:
|
||||
|
||||
```
|
||||
dalai llama get 7B
|
||||
```
|
||||
|
||||
|
||||
or to download multiple models:
|
||||
|
||||
```
|
||||
dalai llama get 7B 13B
|
||||
```
|
||||
|
||||
#### Download Alpaca models
|
||||
|
||||
Currently alpaca only has the 7B model:
|
||||
|
||||
```
|
||||
dalai alpaca get 7B
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 5. Run Web UI
|
||||
|
||||
After everything has been installed, run the following command to launch the web UI server:
|
||||
|
||||
```
|
||||
dalai serve
|
||||
```
|
||||
|
||||
and open http://localhost:3000 in your browser. Have fun!
|
||||
|
||||
After everything has been installed, open http://localhost:3000 in your browser. Have fun!
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Linux
|
||||
|
||||
### Step 1. Install
|
||||
### Step 1. Install Dependencies
|
||||
|
||||
After everything has been installed, open http://localhost:3000 in your browser. Have fun!
|
||||
You need to make sure you have the correct version of Python and Node.js installed.
|
||||
|
||||
Basic install (7B model only)
|
||||
#### Step 1.1. Python <= 3.10
|
||||
|
||||
<a href="https://pimylifeup.com/installing-python-on-linux/" class='btn'>Download node.js</a>
|
||||
|
||||
> Make sure the version is 3.10 or lower (not 3.11)
|
||||
Python must be 3.10 or below (pytorch and other libraries are not supported yet on the latest)
|
||||
|
||||
|
||||
|
||||
#### Step 1.2. Node.js >= 18
|
||||
|
||||
<a href="https://nodejs.org/en/download/package-manager/" class='btn'>Download node.js</a>
|
||||
|
||||
> Make sure the version is 18 or higher
|
||||
|
||||
|
||||
|
||||
### Step 2. Install Dalai
|
||||
|
||||
First install dalai:
|
||||
|
||||
```
|
||||
npx dalai llama
|
||||
npm install -g dalai
|
||||
```
|
||||
|
||||
Or, install all models
|
||||
### Step 3. Install Engines
|
||||
|
||||
Currently supported engines are `llama` and `alpaca`.
|
||||
|
||||
#### Install LLaMA
|
||||
|
||||
To install `llama`, run:
|
||||
|
||||
```
|
||||
npx dalai llama 7B 13B 30B 65B
|
||||
dalai llama install
|
||||
```
|
||||
|
||||
The install command :
|
||||
#### Install Alpaca
|
||||
|
||||
1. Creates a folder named `dalai` under your home directory (`~`)
|
||||
2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/llama.cpp`
|
||||
3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/llama.cpp/models`
|
||||
4. Runs some tasks to convert the LLaMA models so they can be used
|
||||
To install `alpaca`, run:
|
||||
|
||||
### Step 2. Run Web UI
|
||||
```
|
||||
dalai alpaca install
|
||||
```
|
||||
|
||||
### Step 4. Get Models
|
||||
|
||||
#### Download LLaMA models
|
||||
|
||||
To download llama models, you can run:
|
||||
|
||||
```
|
||||
dalai llama get 7B
|
||||
```
|
||||
|
||||
|
||||
or to download multiple models:
|
||||
|
||||
```
|
||||
dalai llama get 7B 13B
|
||||
```
|
||||
|
||||
#### Download Alpaca models
|
||||
|
||||
Currently alpaca only has the 7B model:
|
||||
|
||||
```
|
||||
dalai alpaca get 7B
|
||||
```
|
||||
|
||||
|
||||
### Step 3. Run Web UI
|
||||
|
||||
After everything has been installed, run the following command to launch the web UI server:
|
||||
|
||||
```
|
||||
dalai serve
|
||||
```
|
||||
|
||||
and open http://localhost:3000 in your browser. Have fun!
|
||||
|
||||
After everything has been installed, open http://localhost:3000 in your browser. Have fun!
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Commands
|
||||
|
||||
## 1. install
|
||||
|
||||
### LLaMA
|
||||
|
||||
Install the core engine for the model
|
||||
|
||||
```
|
||||
dalai llama install
|
||||
```
|
||||
|
||||
### Alpaca
|
||||
|
||||
Install the core engine for the model
|
||||
|
||||
```
|
||||
dalai alpaca install
|
||||
```
|
||||
|
||||
## 2. get
|
||||
|
||||
Download the full LLaMA model and convert and compress them
|
||||
|
||||
### LLaMA
|
||||
|
||||
Download one model:
|
||||
|
||||
```
|
||||
dalai llama get 7B
|
||||
```
|
||||
|
||||
Download multiple models:
|
||||
|
||||
```
|
||||
dalai llama get 7B 13B
|
||||
```
|
||||
|
||||
### Alpaca
|
||||
|
||||
Currently only 7B available:
|
||||
|
||||
```
|
||||
dalai alpaca get 7B
|
||||
```
|
||||
|
||||
|
||||
## 3. serve
|
||||
|
||||
Start a dalai server and an API endpoint (powered by socket.io)
|
||||
|
||||
```
|
||||
dalai serve
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# API
|
||||
|
||||
Dalai is also an NPM package:
|
||||
@ -281,7 +505,8 @@ dalai.request(req, callback)
|
||||
|
||||
- `req`: a request object. made up of the following attributes:
|
||||
- `prompt`: **(required)** The prompt string
|
||||
- `model`: **(required)** The model name to query ("7B", "13B", etc.)
|
||||
- `model`: **(required)** The model type + model name to query. Takes the following form: `<model_type>.<model_name>`
|
||||
- Example: `alpaca.7B`, `llama.13B`, ...
|
||||
- `url`: only needed if connecting to a remote dalai server
|
||||
- if unspecified, it uses the node.js API to directly run dalai locally
|
||||
- if specified (for example `ws://localhost:3000`) it looks for a socket.io endpoint at the URL and connects to it.
|
||||
@ -388,15 +613,18 @@ http.listen(3000, () => {
|
||||
})
|
||||
```
|
||||
|
||||
## 5. install()
|
||||
## 5. get()
|
||||
|
||||
### Syntax
|
||||
|
||||
```javascript
|
||||
await dalai.install(model1, model2, ...)
|
||||
await dalai.install(model_type, model_name1, model_name2, ...)
|
||||
```
|
||||
|
||||
- `models`: the model names to install ("7B"`, "13B", "30B", "65B", etc)
|
||||
- `model_type`: the name of the model. currently supports:
|
||||
- "alpaca"
|
||||
- "llama"
|
||||
- `model1`, `model2`, ...: the model names to install ("7B"`, "13B", "30B", "65B", etc)
|
||||
|
||||
### Examples
|
||||
|
||||
@ -431,24 +659,57 @@ const models = await dalai.installed()
|
||||
console.log(models) // prints ["7B", "13B"]
|
||||
```
|
||||
|
||||
<!--
|
||||
|
||||
---
|
||||
|
||||
## 7. download()
|
||||
|
||||
Download models.
|
||||
|
||||
There are two download options:
|
||||
|
||||
1. **LLaMA:** Download the original LLaMA model, convert it, and quantize (compress) it
|
||||
2. **LLaMA.zip:** Download the compressed version (generated from step 1 and published on HuggingFace)
|
||||
|
||||
### Syntax
|
||||
|
||||
```javascript
|
||||
await dalai.download(model1, model2, model3, ...)
|
||||
```
|
||||
|
||||
- `models`: the model names to install. Can be: "7B"`, "13B", "30B", "65B", "7B.zip", "13B.zip", "30B.zip", "65B.zip"
|
||||
- "7B", "13B", "30B", "65B": download the raw model, convert, and quantize
|
||||
- "7B.zip", "13B.zip", "30B.zip", "65B.zip": download the quantized model (no need to waste time downloading huge files)
|
||||
|
||||
### Examples
|
||||
|
||||
Install the "7B" and "13B" models:
|
||||
|
||||
|
||||
```javascript
|
||||
const Dalai = require("dalai");
|
||||
const dalai = new Dalai()
|
||||
await dalai.install("7B", "13B")
|
||||
```
|
||||
|
||||
-->
|
||||
|
||||
---
|
||||
|
||||
|
||||
# FAQ
|
||||
|
||||
## Updating to the latest
|
||||
|
||||
Dalai is a young project and will evolve quickly.
|
||||
As of `dalai@0.3.0` the recommended way to use dalai is through `npm install -g` (not the `npx` method)
|
||||
|
||||
To update dalai, you will need to run the dalai command with a version number specified (You only need to do this once when you update).
|
||||
|
||||
For example, let's say you've been using `dalai@0.1.0` but a new version `dalai@0.2.0` came out.
|
||||
|
||||
The simplest way to update is to just run the dalai server:
|
||||
The simplest way to make sure you have the correct version is running:
|
||||
|
||||
```
|
||||
npx dalai@0.2.0 serve
|
||||
npm install -g dalai@0.3.0
|
||||
```
|
||||
|
||||
Once you run the command it will ask you if you want to update. Confirm, and it will now install `0.2.0`, and from that point on you don't need to specify the version. You can just run `npx dalai serve` and the new version will be executed from that point on.
|
||||
|
||||
|
||||
## Staying up to date
|
||||
|
||||
|
BIN
docs/alpa.png
Normal file
BIN
docs/alpa.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 56 KiB |
BIN
docs/alpaca.gif
Normal file
BIN
docs/alpaca.gif
Normal file
Binary file not shown.
After ![]() (image error) Size: 7.9 MiB |
BIN
docs/alpaca_spec.png
Normal file
BIN
docs/alpaca_spec.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 278 KiB |
BIN
docs/cmd.png
Normal file
BIN
docs/cmd.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 120 KiB |
BIN
docs/cmd2.png
Normal file
BIN
docs/cmd2.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 56 KiB |
BIN
docs/llam.png
Normal file
BIN
docs/llam.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 54 KiB |
366
index.js
366
index.js
@ -14,6 +14,8 @@ const semver = require('semver');
|
||||
const _7z = require('7zip-min');
|
||||
const platform = os.platform()
|
||||
const shell = platform === 'win32' ? 'powershell.exe' : 'bash';
|
||||
const L = require("./llama")
|
||||
const A = require("./alpaca")
|
||||
class Dalai {
|
||||
constructor(home) {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -30,7 +32,7 @@ class Dalai {
|
||||
// Otherwise if you want to customize the path you can just pass in the "home" attribute to manually set it.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
this.home = home ? path.resolve(home) : path.resolve(os.homedir(), "llama.cpp")
|
||||
this.home = home ? path.resolve(home) : path.resolve(os.homedir(), "dalai")
|
||||
try {
|
||||
fs.mkdirSync(this.home, { recursive: true })
|
||||
} catch (e) { }
|
||||
@ -39,87 +41,10 @@ class Dalai {
|
||||
cols: 200,
|
||||
rows: 30,
|
||||
}
|
||||
}
|
||||
async download(model) {
|
||||
console.log(`Download model ${model}`)
|
||||
const num = {
|
||||
"7B": 1,
|
||||
"13B": 2,
|
||||
"30B": 4,
|
||||
"65B": 8,
|
||||
this.cores = {
|
||||
llama: new L(this),
|
||||
alpaca: new A(this),
|
||||
}
|
||||
const files = ["checklist.chk", "params.json"]
|
||||
for(let i=0; i<num[model]; i++) {
|
||||
files.push(`consolidated.0${i}.pth`)
|
||||
}
|
||||
const resolvedPath = path.resolve(this.home, "models", model)
|
||||
await fs.promises.mkdir(resolvedPath, { recursive: true }).catch((e) => { })
|
||||
|
||||
for(let file of files) {
|
||||
if (fs.existsSync(path.resolve(resolvedPath, file))) {
|
||||
console.log(`Skip file download, it already exists: ${file}`)
|
||||
continue;
|
||||
}
|
||||
|
||||
const task = `downloading ${file}`
|
||||
const downloader = new Downloader({
|
||||
url: `https://agi.gpt4.org/llama/LLaMA/${model}/${file}`,
|
||||
directory: path.resolve(this.home, "models", model),
|
||||
onProgress: (percentage, chunk, remainingSize) => {
|
||||
this.progress(task, percentage)
|
||||
},
|
||||
});
|
||||
try {
|
||||
await this.startProgress(task)
|
||||
await downloader.download();
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
this.progressBar.update(1);
|
||||
term("\n")
|
||||
}
|
||||
|
||||
const files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
|
||||
for(let file of files2) {
|
||||
if (fs.existsSync(path.resolve(this.home, "models", file))) {
|
||||
console.log(`Skip file download, it already exists: ${file}`)
|
||||
continue;
|
||||
}
|
||||
const task = `downloading ${file}`
|
||||
const downloader = new Downloader({
|
||||
url: `https://agi.gpt4.org/llama/LLaMA/${file}`,
|
||||
directory: path.resolve(this.home, "models"),
|
||||
onProgress: (percentage, chunk, remainingSize) => {
|
||||
this.progress(task, percentage)
|
||||
},
|
||||
});
|
||||
try {
|
||||
await this.startProgress(task)
|
||||
await downloader.download();
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
this.progressBar.update(1);
|
||||
term("\n")
|
||||
}
|
||||
|
||||
}
|
||||
async installed() {
|
||||
const modelsPath = path.resolve(this.home, "models")
|
||||
console.log("modelsPath", modelsPath)
|
||||
const modelFolders = (await fs.promises.readdir(modelsPath, { withFileTypes: true }))
|
||||
.filter(dirent => dirent.isDirectory())
|
||||
.map(dirent => dirent.name)
|
||||
|
||||
console.log({ modelFolders })
|
||||
const modelNames = []
|
||||
for(let modelFolder of modelFolders) {
|
||||
if (fs.existsSync(path.resolve(modelsPath, modelFolder, 'ggml-model-q4_0.bin'))) {
|
||||
modelNames.push(modelFolder)
|
||||
console.log("exists", modelFolder)
|
||||
}
|
||||
}
|
||||
return modelNames
|
||||
}
|
||||
async python () {
|
||||
// install self-contained python => only for windows for now
|
||||
@ -179,29 +104,160 @@ class Dalai {
|
||||
console.log("cleaning up temp files")
|
||||
await fs.promises.rm(path.resolve(this.home, "x86_64-12.2.0-release-win32-seh-msvcrt-rt_v10-rev2.7z"))
|
||||
}
|
||||
async install(...models) {
|
||||
async query(req, cb) {
|
||||
|
||||
console.log(`> query:`, req)
|
||||
if (req.method === "installed") {
|
||||
let models = await this.installed()
|
||||
for(let model of models) {
|
||||
cb(model)
|
||||
}
|
||||
cb('\n\n<end>')
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
const [Core, Model] = req.model.split(".")
|
||||
|
||||
console.log( { Core, Model } )
|
||||
|
||||
let o = {
|
||||
seed: req.seed || -1,
|
||||
threads: req.threads || 8,
|
||||
n_predict: req.n_predict || 128,
|
||||
model: `models/${Model || "7B"}/ggml-model-q4_0.bin`,
|
||||
}
|
||||
|
||||
if (!fs.existsSync(path.resolve(this.home, Core, "models", Model))) {
|
||||
cb(`File does not exist: ${Model}. Try "dalai ${Core} get ${Model}" first.`)
|
||||
return
|
||||
}
|
||||
|
||||
if (req.top_k) o.top_k = req.top_k
|
||||
if (req.top_p) o.top_p = req.top_p
|
||||
if (req.temp) o.temp = req.temp
|
||||
if (req.batch_size) o.batch_size = req.batch_size
|
||||
if (req.repeat_last_n) o.repeat_last_n = req.repeat_last_n
|
||||
if (req.repeat_penalty) o.repeat_penalty = req.repeat_penalty
|
||||
if (typeof req.interactive !== "undefined") o.interactive = req.interactive
|
||||
|
||||
let chunks = []
|
||||
for(let key in o) {
|
||||
chunks.push(`--${key} ${o[key]}`)
|
||||
}
|
||||
chunks.push(`-p "${req.prompt}"`)
|
||||
|
||||
const main_bin_path = platform === "win32" ? path.resolve(this.home, Core, "build", "Release", this.cores[Core].launcher[platform]) : path.resolve(this.home, Core, this.cores[Core].launcher[platform])
|
||||
if (req.full) {
|
||||
await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.cores[Core].home, cb)
|
||||
} else {
|
||||
const startpattern = /.*sampling parameters:.*/g
|
||||
const endpattern = /.*mem per token.*/g
|
||||
let started = req.debug
|
||||
let ended = false
|
||||
let writeEnd = !req.skip_end
|
||||
await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.cores[Core].home, (msg) => {
|
||||
if (endpattern.test(msg)) ended = true
|
||||
if (started && !ended) {
|
||||
cb(msg)
|
||||
} else if (ended && writeEnd) {
|
||||
cb('\n\n<end>')
|
||||
writeEnd = false
|
||||
}
|
||||
if (startpattern.test(msg)) started = true
|
||||
})
|
||||
}
|
||||
}
|
||||
async get(core, ...models) {
|
||||
let res = await this.cores[core].get(...models)
|
||||
return res
|
||||
}
|
||||
async installed() {
|
||||
// get cores
|
||||
const modelNames = []
|
||||
for(let core of ["alpaca", "llama"]) {
|
||||
const modelsPath = path.resolve(this.home, core, "models")
|
||||
console.log("modelsPath", modelsPath)
|
||||
let modelFolders = []
|
||||
try {
|
||||
modelFolders = (await fs.promises.readdir(modelsPath, { withFileTypes: true }))
|
||||
.filter(dirent => dirent.isDirectory())
|
||||
.map(dirent => dirent.name)
|
||||
} catch (e) {
|
||||
}
|
||||
|
||||
console.log({ modelFolders })
|
||||
for(let modelFolder of modelFolders) {
|
||||
if (fs.existsSync(path.resolve(modelsPath, modelFolder, 'ggml-model-q4_0.bin'))) {
|
||||
modelNames.push(`${core}.${modelFolder}`)
|
||||
console.log("exists", modelFolder)
|
||||
}
|
||||
}
|
||||
}
|
||||
return modelNames
|
||||
}
|
||||
async install (core) {
|
||||
/**************************************************************************************************************
|
||||
*
|
||||
* 2. Download Core
|
||||
*
|
||||
**************************************************************************************************************/
|
||||
let engine = this.cores[core]
|
||||
try {
|
||||
if (fs.existsSync(path.resolve(engine.home))) {
|
||||
console.log("try fetching", engine.home, engine.url)
|
||||
await git.fetch({ fs, http, dir: engine.home, url: engine.url })
|
||||
} else {
|
||||
console.log("try cloning", engine.home, engine.url)
|
||||
await git.clone({ fs, http, dir: engine.home, url: engine.url })
|
||||
}
|
||||
} catch (e) {
|
||||
console.log("ERROR", e)
|
||||
}
|
||||
/**************************************************************************************************************
|
||||
*
|
||||
* 4. Compile & Build
|
||||
* - make: linux + mac
|
||||
* - cmake: windows
|
||||
*
|
||||
**************************************************************************************************************/
|
||||
await this.cores[core].make()
|
||||
}
|
||||
async setup() {
|
||||
|
||||
let success;
|
||||
|
||||
/**************************************************************************************************************
|
||||
*
|
||||
* 1. Validate
|
||||
*
|
||||
**************************************************************************************************************/
|
||||
// Check if current version is greater than or equal to 18
|
||||
const node_version = process.version;
|
||||
if (!semver.gte(node_version, '18.0.0')) {
|
||||
throw new Error("outdated Node version, please install Node 18 or newer")
|
||||
}
|
||||
let success;
|
||||
try {
|
||||
console.log("try cloning")
|
||||
await git.clone({ fs, http, dir: this.home, url: "https://github.com/ggerganov/llama.cpp.git" })
|
||||
} catch (e) {
|
||||
console.log("try pulling")
|
||||
await git.pull({ fs, http, dir: this.home, url: "https://github.com/ggerganov/llama.cpp.git" })
|
||||
}
|
||||
|
||||
// windows don't ship with python, so install a dedicated self-contained python
|
||||
|
||||
|
||||
/**************************************************************************************************************
|
||||
*
|
||||
* 3. Download Global Dependencies
|
||||
* - Python (windows only)
|
||||
* - build-essential (linux only)
|
||||
* - virtualenv
|
||||
* - torch, numpy, etc.
|
||||
*
|
||||
**************************************************************************************************************/
|
||||
|
||||
// 3.1. Python: Windows doesn't ship with python, so install a dedicated self-contained python
|
||||
if (platform === "win32") {
|
||||
await this.python()
|
||||
}
|
||||
const root_python_paths = (platform === "win32" ? [path.resolve(this.home, "python", "python.exe")] : ["python3", "python"])
|
||||
const root_pip_paths = (platform === "win32" ? [path.resolve(this.home, "python", "python -m pip")] : ["pip3", "pip"])
|
||||
|
||||
// prerequisites
|
||||
// 3.2. Build tools
|
||||
if (platform === "linux") {
|
||||
// ubuntu debian
|
||||
success = await this.exec("apt-get install build-essential python3-venv -y")
|
||||
@ -218,10 +274,19 @@ class Dalai {
|
||||
if (!success) {
|
||||
throw new Error("cannot install virtualenv")
|
||||
}
|
||||
}
|
||||
// create venv
|
||||
const venv_path = path.join(this.home, "venv")
|
||||
|
||||
// cmake (only on windows. the rest platforms use make)
|
||||
if (platform === "win32") {
|
||||
success = await this.exec(`${pip_path} install cmake`)
|
||||
if (!success) {
|
||||
throw new Error("cmake installation failed")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3.3. virtualenv
|
||||
const venv_path = path.join(this.home, "venv")
|
||||
for(let root_python_path of root_python_paths) {
|
||||
success = await this.exec(`${root_python_path} -m venv ${venv_path}`)
|
||||
if (success) break;
|
||||
@ -231,54 +296,21 @@ class Dalai {
|
||||
return
|
||||
}
|
||||
|
||||
// different venv paths for Windows
|
||||
// 3.4. Python libraries
|
||||
const pip_path = platform === "win32" ? path.join(venv_path, "Scripts", "pip.exe") : path.join(venv_path, "bin", "pip")
|
||||
const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
|
||||
|
||||
// upgrade setuptools
|
||||
success = await this.exec(`${pip_path} install --upgrade pip setuptools wheel`)
|
||||
if (!success) {
|
||||
throw new Error("pip setuptools wheel upgrade failed")
|
||||
return
|
||||
}
|
||||
|
||||
// install to ~/llama.cpp
|
||||
success = await this.exec(`${pip_path} install torch torchvision torchaudio sentencepiece numpy`)
|
||||
if (!success) {
|
||||
throw new Error("dependency installation failed")
|
||||
return
|
||||
}
|
||||
|
||||
if (platform === "win32") {
|
||||
success = await this.exec(`${pip_path} install cmake`)
|
||||
if (!success) {
|
||||
throw new Error("cmake installation failed")
|
||||
return
|
||||
}
|
||||
await this.exec("mkdir build", this.home)
|
||||
await this.exec(`Remove-Item -path ${path.resolve(this.home, "build", "CMakeCache.txt")}`, this.home)
|
||||
|
||||
const cmake_path = path.join(venv_path, "Scripts", "cmake")
|
||||
await this.exec(`${cmake_path} ..`, path.resolve(this.home, "build"))
|
||||
await this.exec(`${cmake_path} --build . --config Release`, path.resolve(this.home, "build"))
|
||||
|
||||
} else {
|
||||
success = await this.exec("make", this.home)
|
||||
if (!success) {
|
||||
throw new Error("running 'make' failed")
|
||||
return
|
||||
}
|
||||
}
|
||||
for(let model of models) {
|
||||
await this.download(model)
|
||||
const outputFile = path.resolve(this.home, 'models', model, 'ggml-model-f16.bin')
|
||||
if (fs.existsSync(outputFile)) {
|
||||
console.log(`Skip conversion, file already exists: ${outputFile}`)
|
||||
} else {
|
||||
await this.exec(`${python_path} convert-pth-to-ggml.py models/${model}/ 1`, this.home)
|
||||
}
|
||||
await this.quantize(model)
|
||||
}
|
||||
}
|
||||
serve(port) {
|
||||
const httpServer = createServer();
|
||||
@ -309,63 +341,6 @@ class Dalai {
|
||||
await this.query(req, cb)
|
||||
}
|
||||
}
|
||||
async query(req, cb) {
|
||||
console.log(`> query:`, req)
|
||||
if (req.method === "installed") {
|
||||
let models = await this.installed()
|
||||
for(let model of models) {
|
||||
cb(model)
|
||||
}
|
||||
cb('\n\n<end>')
|
||||
return
|
||||
}
|
||||
|
||||
let o = {
|
||||
seed: req.seed || -1,
|
||||
threads: req.threads || 8,
|
||||
n_predict: req.n_predict || 128,
|
||||
model: `models/${req.model || "7B"}/ggml-model-q4_0.bin`
|
||||
}
|
||||
|
||||
if (!fs.existsSync(path.resolve(this.home, o.model))) {
|
||||
cb(`File does not exist: ${o.model}. Try "dalai llama ${req.model}" first.`)
|
||||
return
|
||||
}
|
||||
|
||||
if (req.top_k) o.top_k = req.top_k
|
||||
if (req.top_p) o.top_p = req.top_p
|
||||
if (req.temp) o.temp = req.temp
|
||||
if (req.batch_size) o.batch_size = req.batch_size
|
||||
if (req.repeat_last_n) o.repeat_last_n = req.repeat_last_n
|
||||
if (req.repeat_penalty) o.repeat_penalty = req.repeat_penalty
|
||||
|
||||
let chunks = []
|
||||
for(let key in o) {
|
||||
chunks.push(`--${key} ${o[key]}`)
|
||||
}
|
||||
chunks.push(`-p "${req.prompt}"`)
|
||||
|
||||
const main_bin_path = platform === "win32" ? path.resolve(this.home, "build", "Release", "llama") : path.resolve(this.home, "main")
|
||||
if (req.full) {
|
||||
await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.home, cb)
|
||||
} else {
|
||||
const startpattern = /.*sampling parameters:.*/g
|
||||
const endpattern = /.*mem per token.*/g
|
||||
let started = false
|
||||
let ended = false
|
||||
let writeEnd = !req.skip_end
|
||||
await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.home, (msg) => {
|
||||
if (endpattern.test(msg)) ended = true
|
||||
if (started && !ended) {
|
||||
cb(msg)
|
||||
} else if (ended && writeEnd) {
|
||||
cb('\n\n<end>')
|
||||
writeEnd = false
|
||||
}
|
||||
if (startpattern.test(msg)) started = true
|
||||
})
|
||||
}
|
||||
}
|
||||
connect(req, cb) {
|
||||
const socket = io(req.url)
|
||||
socket.emit('request', req)
|
||||
@ -402,25 +377,6 @@ class Dalai {
|
||||
ptyProcess.write("exit\r")
|
||||
})
|
||||
}
|
||||
async quantize(model) {
|
||||
let num = {
|
||||
"7B": 1,
|
||||
"13B": 2,
|
||||
"30B": 4,
|
||||
"65B": 8,
|
||||
}
|
||||
for(let i=0; i<num[model]; i++) {
|
||||
const suffix = (i === 0 ? "" : `.${i}`)
|
||||
const outputFile1 = path.resolve(this.home, `./models/${model}/ggml-model-f16.bin${suffix}`)
|
||||
const outputFile2 = path.resolve(this.home, `./models/${model}/ggml-model-q4_0.bin${suffix}`)
|
||||
if (fs.existsSync(outputFile1) && fs.existsSync(outputFile2)) {
|
||||
console.log(`Skip quantization, files already exists: ${outputFile1} and ${outputFile2}}`)
|
||||
continue
|
||||
}
|
||||
const bin_path = platform === "win32" ? path.resolve(this.home, "build", "Release") : this.home
|
||||
await this.exec(`./quantize ${outputFile1} ${outputFile2} 2`, bin_path)
|
||||
}
|
||||
}
|
||||
progress(task, percent) {
|
||||
this.progressBar.update(percent/100);
|
||||
//if (percent >= 100) {
|
||||
|
172
llama.js
Normal file
172
llama.js
Normal file
@ -0,0 +1,172 @@
|
||||
const path = require('path');
|
||||
const term = require( 'terminal-kit' ).terminal;
|
||||
const git = require('isomorphic-git');
|
||||
const Downloader = require("nodejs-file-downloader");
|
||||
const http = require('isomorphic-git/http/node');
|
||||
const os = require('os');
|
||||
const fs = require("fs");
|
||||
const platform = os.platform()
|
||||
class LLaMA {
|
||||
constructor(root) {
|
||||
this.root = root
|
||||
this.home = path.resolve(this.root.home, "llama")
|
||||
this.url = "https://github.com/ggerganov/llama.cpp.git"
|
||||
this.launcher = {
|
||||
win32: "llama",
|
||||
linux: "main",
|
||||
darwin: "main"
|
||||
}
|
||||
}
|
||||
async make() {
|
||||
let success
|
||||
if (platform === "win32") {
|
||||
// CMake on Windows
|
||||
const venv_path = path.join(this.root.home, "venv")
|
||||
const cmake_path = path.join(venv_path, "Scripts", "cmake")
|
||||
await this.root.exec("mkdir build", this.home)
|
||||
await this.root.exec(`Remove-Item -path ${path.resolve(this.home, "build", "CMakeCache.txt")}`, this.home)
|
||||
await this.root.exec(`${cmake_path} ..`, path.resolve(this.home, "build"))
|
||||
await this.root.exec(`${cmake_path} --build . --config Release`, path.resolve(this.home, "build"))
|
||||
} else {
|
||||
// Make on linux + mac
|
||||
success = await this.root.exec(`make`, this.home)
|
||||
if (!success) {
|
||||
throw new Error("running 'make' failed")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
async get (...models) {
|
||||
if (models.length === 0) models = ["7B"]
|
||||
for(let model of models) {
|
||||
if (!["7B", "13B", "30B", "65B"].includes(model)) {
|
||||
console.log(`##########################################################
|
||||
#
|
||||
# ERROR
|
||||
# The arguments must be one or more of the following:
|
||||
#
|
||||
# 7B, 13B, 30B, 65B
|
||||
#
|
||||
##########################################################
|
||||
|
||||
[Example]
|
||||
|
||||
# install just 7B (default)
|
||||
npx dalai install
|
||||
|
||||
# install 7B manually
|
||||
npx dalai install 7B
|
||||
|
||||
# install 7B and 13B
|
||||
npx dalai install 7B 13B
|
||||
`)
|
||||
throw new Error("The model name must be one of: 7B, 13B, 30B, and 65B")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
const venv_path = path.join(this.root.home, "venv")
|
||||
const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
|
||||
/**************************************************************************************************************
|
||||
*
|
||||
* 5. Download models + convert + quantize
|
||||
*
|
||||
**************************************************************************************************************/
|
||||
for(let model of models) {
|
||||
await this.download(model)
|
||||
const outputFile = path.resolve(this.home, 'models', model, 'ggml-model-f16.bin')
|
||||
if (fs.existsSync(outputFile)) {
|
||||
console.log(`Skip conversion, file already exists: ${outputFile}`)
|
||||
} else {
|
||||
await this.root.exec(`${python_path} convert-pth-to-ggml.py models/${model}/ 1`, this.home)
|
||||
}
|
||||
await this.quantize(model)
|
||||
}
|
||||
}
|
||||
async make() {
|
||||
}
|
||||
async quantize(model) {
|
||||
let num = {
|
||||
"7B": 1,
|
||||
"13B": 2,
|
||||
"30B": 4,
|
||||
"65B": 8,
|
||||
}
|
||||
for(let i=0; i<num[model]; i++) {
|
||||
const suffix = (i === 0 ? "" : `.${i}`)
|
||||
const outputFile1 = path.resolve(this.home, `./models/${model}/ggml-model-f16.bin${suffix}`)
|
||||
const outputFile2 = path.resolve(this.home, `./models/${model}/ggml-model-q4_0.bin${suffix}`)
|
||||
if (fs.existsSync(outputFile1) && fs.existsSync(outputFile2)) {
|
||||
console.log(`Skip quantization, files already exists: ${outputFile1} and ${outputFile2}}`)
|
||||
continue
|
||||
}
|
||||
const bin_path = platform === "win32" ? path.resolve(this.home, "build", "Release") : this.home
|
||||
await this.root.exec(`./quantize ${outputFile1} ${outputFile2} 2`, bin_path)
|
||||
}
|
||||
}
|
||||
async download(model) {
|
||||
console.log(`Download model ${model}`)
|
||||
const num = {
|
||||
"7B": 1,
|
||||
"13B": 2,
|
||||
"30B": 4,
|
||||
"65B": 8,
|
||||
}
|
||||
const files = ["checklist.chk", "params.json"]
|
||||
for(let i=0; i<num[model]; i++) {
|
||||
files.push(`consolidated.0${i}.pth`)
|
||||
}
|
||||
const resolvedPath = path.resolve(this.home, "models", model)
|
||||
await fs.promises.mkdir(resolvedPath, { recursive: true }).catch((e) => { })
|
||||
|
||||
for(let file of files) {
|
||||
if (fs.existsSync(path.resolve(resolvedPath, file))) {
|
||||
console.log(`Skip file download, it already exists: ${file}`)
|
||||
continue;
|
||||
}
|
||||
|
||||
const task = `downloading ${file}`
|
||||
const downloader = new Downloader({
|
||||
url: `https://agi.gpt4.org/llama/LLaMA/${model}/${file}`,
|
||||
directory: path.resolve(this.home, "models", model),
|
||||
onProgress: (percentage, chunk, remainingSize) => {
|
||||
this.root.progress(task, percentage)
|
||||
},
|
||||
});
|
||||
try {
|
||||
await this.root.startProgress(task)
|
||||
await downloader.download();
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
this.root.progressBar.update(1);
|
||||
term("\n")
|
||||
}
|
||||
|
||||
const files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
|
||||
for(let file of files2) {
|
||||
if (fs.existsSync(path.resolve(this.home, "models", file))) {
|
||||
console.log(`Skip file download, it already exists: ${file}`)
|
||||
continue;
|
||||
}
|
||||
const task = `downloading ${file}`
|
||||
const downloader = new Downloader({
|
||||
url: `https://agi.gpt4.org/llama/LLaMA/${file}`,
|
||||
directory: path.resolve(this.home, "models"),
|
||||
onProgress: (percentage, chunk, remainingSize) => {
|
||||
this.root.progress(task, percentage)
|
||||
},
|
||||
});
|
||||
try {
|
||||
await this.root.startProgress(task)
|
||||
await downloader.download();
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
this.root.progressBar.update(1);
|
||||
term("\n")
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
module.exports = LLaMA
|
1
package-lock.json
generated
1
package-lock.json
generated
@ -7,6 +7,7 @@
|
||||
"": {
|
||||
"name": "dalai",
|
||||
"version": "0.2.0",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"7zip-min": "^1.4.4",
|
||||
|
@ -9,7 +9,8 @@
|
||||
"dalai:llama": "./dalai llama",
|
||||
"start": "./dalai serve",
|
||||
"just:run": "wrap () { yarn && yarn dalai:llama $1 && yarn start; }; wrap",
|
||||
"just:fix": "npx prettier --write ."
|
||||
"just:fix": "npx prettier --write .",
|
||||
"postinstall": "node ./bin/cli setup"
|
||||
},
|
||||
"dependencies": {
|
||||
"7zip-min": "^1.4.4",
|
||||
|
Loading…
x
Reference in New Issue
Block a user