1
mirror of https://github.com/cocktailpeanut/dalai synced 2025-03-06 18:53:01 +01:00

dalai alpaca

This commit is contained in:
cocktailpeanut 2023-03-16 18:46:41 -04:00
parent 607f17af29
commit a4cbf1c73c
14 changed files with 797 additions and 307 deletions

80
alpaca.js Normal file

@ -0,0 +1,80 @@
const path = require('path');
const term = require( 'terminal-kit' ).terminal;
const git = require('isomorphic-git');
const Downloader = require("nodejs-file-downloader");
const http = require('isomorphic-git/http/node');
const os = require('os');
const fs = require("fs");
const platform = os.platform()
class Alpaca {
constructor(root) {
this.root = root
this.home = path.resolve(this.root.home, "alpaca")
this.url = "https://github.com/cocktailpeanut/alpaca.cpp.git"
this.launcher = {
win32: "chat",
linux: "chat",
darwin: "chat",
}
}
async make() {
let success
if (platform === "win32") {
// CMake on Windows
const venv_path = path.join(this.root.home, "venv")
const cmake_path = path.join(venv_path, "Scripts", "cmake")
await this.root.exec("mkdir build", this.home)
await this.root.exec(`Remove-Item -path ${path.resolve(this.home, "build", "CMakeCache.txt")}`, this.home)
await this.root.exec(`make chat`, this.home)
} else {
// Make on linux + mac
success = await this.root.exec(`make chat`, this.home)
if (!success) {
throw new Error("running 'make' failed")
return
}
}
}
async get (...models) {
for(let model of models) {
const venv_path = path.join(this.root.home, "venv")
const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
/**************************************************************************************************************
*
* 5. Download models + convert + quantize
*
**************************************************************************************************************/
const outputFile = path.resolve(this.home, 'models', model, 'ggml-model-q4_0.bin')
if (fs.existsSync(outputFile)) {
console.log(`Skip conversion, file already exists: ${outputFile}`)
} else {
const task = `downloading ${outputFile}`
const downloader = new Downloader({
url: "https://gateway.estuary.tech/gw/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC",
//url: "https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC",
//url: `https://cloudflare-ipfs.com/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC`,
fileName: 'ggml-model-q4_0.bin',
directory: path.resolve(this.home, "models", model),
maxAttempts: 3, //Default is 1.
onError: function (error) {
//You can also hook into each failed attempt.
console.log("Error from attempt ", error);
},
onProgress: (percentage, chunk, remainingSize) => {
this.root.progress(task, percentage)
},
});
try {
await this.root.startProgress(task)
await downloader.download();
} catch (error) {
console.log(error);
}
this.root.progressBar.update(1);
term("\n")
}
}
}
}
module.exports = Alpaca

@ -6,37 +6,42 @@ if (process.argv.length > 0) {
if (cmd === "serve") {
const port = (args.length > 0 ? parseInt(args[0]) : 3000)
Web(port)
} else if (cmd === "llama" || cmd === "install") {
if (args.length === 0) args = ["7B"]
for(let arg of args) {
if (!["7B", "13B", "30B", "65B"].includes(arg)) {
console.log(`##########################################################
#
# ERROR
# The arguments must be one or more of the following:
#
# 7B, 13B, 30B, 65B
#
##########################################################
[Example]
# install just 7B (default)
npx dalai install
# install 7B manually
npx dalai install 7B
# install 7B and 13B
npx dalai install 7B 13B
`)
process.exit(1)
break;
}
}
new Dalai().install(...args).then(() => {
} else if (cmd === "setup") {
new Dalai().setup().then(() => {
process.exit(0)
}).catch((e) => {
console.log("Error", e)
process.exit(1)
})
} else {
if (args.length > 0) {
let core = cmd
let [method, ...callparams] = args
let dalai = new Dalai()
console.log({ method, callparams })
// 1. install => install the core module
// 2. get => get models
dalai[method](core, ...callparams).then(() => {
process.exit(0)
}).catch((e) => {
console.log("ERROR", e)
process.exit(1)
})
} else {
console.log("############################################")
console.log("#")
console.log("# Supported Commands:")
console.log("#")
console.log("# 1. System command")
console.log("#")
console.log("# dalai serve <port (optional)>")
console.log("#")
console.log("# 2. Model command")
console.log("#")
console.log("# dalai llama get <model names>")
console.log("#")
console.log("############################################")
}
}
} else {
console.log("ERROR: Please pass a command")

@ -59,11 +59,11 @@
flex-grow: 1;
}
input[type=text] , select {
margin-right: 5px;
border: none;
background: rgba(0,0,0,0.08);
padding: 5px 10px;
box-sizing: border-box;
width: 100px;
}
.logo {
font-weight: bold;
@ -83,6 +83,7 @@
.kv {
display: block;
font-size: 14px;
margin-left: 10px;
}
.kv label {
display: block;
@ -111,13 +112,13 @@
const config = {
seed: -1,
threads: 4,
n_predict: 1000,
model: "7B",
n_predict: 200,
top_k: 40,
top_p: 0.9,
temp: 0.8,
repeat_last_n: 64,
repeat_penalty: 1.3,
debug: false,
models: []
}
const socket = io();
@ -125,17 +126,27 @@ const form = document.getElementById('form');
const input = document.querySelector('#input');
const model = document.querySelector('#model');
const renderHeader = (config) => {
const fields = ["n_predict", "repeat_last_n", "repeat_penalty", "top_k", "top_p", "temp", "seed"].map((key) => {
return `<div class='kv'>
const fields = [{ key: "debug", type: "checkbox" }, "n_predict", "repeat_last_n", "repeat_penalty", "top_k", "top_p", "temp", "seed"].map((key) => {
if (typeof key === "string") {
return `<div class='kv'>
<label>${key}</label>
<input name="${key}" type='text' placeholder="${key}" value="${config[key] || ''}">
</div>`
} else {
if (key.type === "checkbox") {
return `<div class='kv'>
<label>${key.key}</label>
<input name="${key.key}" type='checkbox' ${config[key.key] ? "checked" : ""}>
</div>`
}
}
}).join("")
const models = config.models.map((model) => {
return `<option value="7B" ${config.model === model ? "selected" : ""}>${model}</option>`
config.model = config.models[0]
const models = config.models.map((model, i) => {
return `<option value="${model}" ${i === 0 ? "selected" : ""}>${model}</option>`
}).join("")
return `<a class='logo' href="/">Dalai</a><div class='stretch'></div>
<div class='config-container'>
@ -158,6 +169,8 @@ const loading = (on) => {
document.querySelector("form").addEventListener("input", (e) => {
if (e.target.tagName === "SELECT") {
config[e.target.name] = config.models[e.target.selectedIndex]
} else if (e.target.type === "checkbox") {
config[e.target.name] = e.target.checked
} else {
config[e.target.name] = e.target.value
}
@ -203,6 +216,7 @@ socket.emit('request', {
})
socket.on('result', async ({ request, response }) => {
loading(false)
console.log(response)
if (request.method === "installed") {
if (response == "\n\n<end>") {
document.querySelector(".form-header").innerHTML = renderHeader(config)

@ -1,6 +1,6 @@
# Dalai
Run LLaMA on your computer.
Run LLaMA and Alpaca on your computer.
<a href="https://github.com/cocktailpeanut/dalai" class='inverse btn'><i class="fa-brands fa-github"></i> Github</a>
<a href="https://twitter.com/cocktailpeanut" class='inverse btn'><i class="fa-brands fa-twitter"></i> Twitter</a>
@ -8,17 +8,23 @@ Run LLaMA on your computer.
---
#### JUST RUN THIS:
## JUST RUN THIS
<img src="terminal.png" class='round'>
<img src="alpa.png" class='round'>
#### TO GET:
or
![dalai.gif](dalai.gif)
<img src="llam.png" class='round'>
## TO GET
Both alpaca and llama working on your computer!
![alpaca.gif](alpaca.gif)
---
1. Powered by [llama.cpp](https://github.com/ggerganov/llama.cpp) and [llama-dl CDN](https://github.com/shawwn/llama-dl)
1. Powered by [llama.cpp](https://github.com/ggerganov/llama.cpp), [llama-dl CDN](https://github.com/shawwn/llama-dl), and [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp)
2. Hackable web app included
3. Ships with JavaScript API
4. Ships with [Socket.io](https://socket.io/) API
@ -41,6 +47,19 @@ Runs on most modern computers. Unless your computer is very very old, it should
## 3. Disk Space Requirements
### Alpaca
Currently only the 7B model is available via [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp)
#### 7B
Alpaca comes fully quantized (compressed), and the only space you need for the 7B model is 4.21GB:
![alpaca_spec.png](alpaca_spec.png)
### LLaMA
You need a lot of space for storing the models.
You do NOT have to install all models, you can install one by one. Let's take a look at how much space each model takes up:
@ -51,28 +70,28 @@ You do NOT have to install all models, you can install one by one. Let's take a
>
> You can optimize this if you delete the original models (which are much larger) after installation and keep only the quantized versions.
### 7B
#### 7B
- Full: The model takes up 31.17GB
- Quantized: 4.21GB
![7b.png](7b.png)
### 13B
#### 13B
- Full: The model takes up 60.21GB
- Quantized: 4.07GB * 2 = 8.14GB
![13b.png](13b.png)
### 30B
#### 30B
- Full: The model takes up 150.48GB
- Quantized: 5.09GB * 4 = 20.36GB
![30b.png](30b.png)
### 65B
#### 65B
- Full: The model takes up 432.64GB
- Quantized: 5.11GB * 8 = 40.88GB
@ -91,28 +110,67 @@ You do NOT have to install all models, you can install one by one. Let's take a
### Step 2. Install Dalai
Basic install (7B model only)
First install dalai:
```
npx dalai llama
npm install -g dalai
```
Or, install all models
### Step 3. Install Engines
Currently supported engines are `llama` and `alpaca`.
#### Install LLaMA
To install `llama`, run:
```
npx dalai llama 7B 13B 30B 65B
dalai llama install
```
The install command :
#### Install Alpaca
To install `alpaca`, run:
```
dalai alpaca install
```
### Step 4. Get Models
#### Download LLaMA models
To download llama models, you can run:
```
dalai llama get 7B
```
or to download multiple models:
```
dalai llama get 7B 13B
```
#### Download Alpaca models
Currently alpaca only has the 7B model:
```
dalai alpaca get 7B
```
1. Creates a folder named `dalai` under your home directory (`~`)
2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/llama.cpp`
3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/llama.cpp/models`
4. Runs some tasks to convert the LLaMA models so they can be used
### Step 3. Run Web UI
After everything has been installed, open http://localhost:3000 in your browser. Have fun!
After everything has been installed, run the following command to launch the web UI server:
```
dalai serve
```
and open http://localhost:3000 in your browser. Have fun!
---
@ -126,8 +184,6 @@ Press the button below to visit the Visual Studio downloads page and download:
<a href="https://visualstudio.microsoft.com/downloads/" class='btn'>Download Microsoft Visual Studio</a>
---
**IMPORTANT!!!**
When installing Visual Studio, make sure to check the 3 options as highlighted below:
@ -138,32 +194,22 @@ When installing Visual Studio, make sure to check the 3 options as highlighted b
![vs.png](vs.png)
---
### Step 2.1. Install Dalai
Basic install (7B model only)
First install dalai:
```
npx dalai llama
npm install -g dalai
```
Or, install all models
```
npx dalai llama 7B 13B 30B 65B
```
The install command :
1. Creates a folder named `dalai` under your home directory (`~`)
2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/llama.cpp`
3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/llama.cpp/models`
4. Runs some tasks to convert the LLaMA models so they can be used
If this worked without any errors, go to step 3.
Ohterwise try the troubleshoot below:
---
### Step 2.2. Troubleshoot (optional)
In case above steps fail to install, try installing node.js and python separately.
@ -181,44 +227,222 @@ After both have been installed, open powershell and type `python` to see if the
Once you've checked that they both exist, try the `npx dalai llama` command again.
### Step 3. Run Web UI
---
### Step 3. Install Engines
Currently supported engines are `llama` and `alpaca`.
#### Install LLaMA
To install `llama`, run:
```
dalai llama install
```
#### Install Alpaca
To install `alpaca`, run:
```
dalai alpaca install
```
---
### Step 4. Get Models
#### Download LLaMA models
To download llama models, you can run:
```
dalai llama get 7B
```
or to download multiple models:
```
dalai llama get 7B 13B
```
#### Download Alpaca models
Currently alpaca only has the 7B model:
```
dalai alpaca get 7B
```
---
### Step 5. Run Web UI
After everything has been installed, run the following command to launch the web UI server:
```
dalai serve
```
and open http://localhost:3000 in your browser. Have fun!
After everything has been installed, open http://localhost:3000 in your browser. Have fun!
---
## Linux
### Step 1. Install
### Step 1. Install Dependencies
After everything has been installed, open http://localhost:3000 in your browser. Have fun!
You need to make sure you have the correct version of Python and Node.js installed.
Basic install (7B model only)
#### Step 1.1. Python <= 3.10
<a href="https://pimylifeup.com/installing-python-on-linux/" class='btn'>Download node.js</a>
> Make sure the version is 3.10 or lower (not 3.11)
Python must be 3.10 or below (pytorch and other libraries are not supported yet on the latest)
#### Step 1.2. Node.js >= 18
<a href="https://nodejs.org/en/download/package-manager/" class='btn'>Download node.js</a>
> Make sure the version is 18 or higher
### Step 2. Install Dalai
First install dalai:
```
npx dalai llama
npm install -g dalai
```
Or, install all models
### Step 3. Install Engines
Currently supported engines are `llama` and `alpaca`.
#### Install LLaMA
To install `llama`, run:
```
npx dalai llama 7B 13B 30B 65B
dalai llama install
```
The install command :
#### Install Alpaca
1. Creates a folder named `dalai` under your home directory (`~`)
2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/llama.cpp`
3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/llama.cpp/models`
4. Runs some tasks to convert the LLaMA models so they can be used
To install `alpaca`, run:
### Step 2. Run Web UI
```
dalai alpaca install
```
### Step 4. Get Models
#### Download LLaMA models
To download llama models, you can run:
```
dalai llama get 7B
```
or to download multiple models:
```
dalai llama get 7B 13B
```
#### Download Alpaca models
Currently alpaca only has the 7B model:
```
dalai alpaca get 7B
```
### Step 3. Run Web UI
After everything has been installed, run the following command to launch the web UI server:
```
dalai serve
```
and open http://localhost:3000 in your browser. Have fun!
After everything has been installed, open http://localhost:3000 in your browser. Have fun!
---
# Commands
## 1. install
### LLaMA
Install the core engine for the model
```
dalai llama install
```
### Alpaca
Install the core engine for the model
```
dalai alpaca install
```
## 2. get
Download the full LLaMA model and convert and compress them
### LLaMA
Download one model:
```
dalai llama get 7B
```
Download multiple models:
```
dalai llama get 7B 13B
```
### Alpaca
Currently only 7B available:
```
dalai alpaca get 7B
```
## 3. serve
Start a dalai server and an API endpoint (powered by socket.io)
```
dalai serve
```
---
# API
Dalai is also an NPM package:
@ -281,7 +505,8 @@ dalai.request(req, callback)
- `req`: a request object. made up of the following attributes:
- `prompt`: **(required)** The prompt string
- `model`: **(required)** The model name to query ("7B", "13B", etc.)
- `model`: **(required)** The model type + model name to query. Takes the following form: `<model_type>.<model_name>`
- Example: `alpaca.7B`, `llama.13B`, ...
- `url`: only needed if connecting to a remote dalai server
- if unspecified, it uses the node.js API to directly run dalai locally
- if specified (for example `ws://localhost:3000`) it looks for a socket.io endpoint at the URL and connects to it.
@ -388,15 +613,18 @@ http.listen(3000, () => {
})
```
## 5. install()
## 5. get()
### Syntax
```javascript
await dalai.install(model1, model2, ...)
await dalai.install(model_type, model_name1, model_name2, ...)
```
- `models`: the model names to install ("7B"`, "13B", "30B", "65B", etc)
- `model_type`: the name of the model. currently supports:
- "alpaca"
- "llama"
- `model1`, `model2`, ...: the model names to install ("7B"`, "13B", "30B", "65B", etc)
### Examples
@ -431,24 +659,57 @@ const models = await dalai.installed()
console.log(models) // prints ["7B", "13B"]
```
<!--
---
## 7. download()
Download models.
There are two download options:
1. **LLaMA:** Download the original LLaMA model, convert it, and quantize (compress) it
2. **LLaMA.zip:** Download the compressed version (generated from step 1 and published on HuggingFace)
### Syntax
```javascript
await dalai.download(model1, model2, model3, ...)
```
- `models`: the model names to install. Can be: "7B"`, "13B", "30B", "65B", "7B.zip", "13B.zip", "30B.zip", "65B.zip"
- "7B", "13B", "30B", "65B": download the raw model, convert, and quantize
- "7B.zip", "13B.zip", "30B.zip", "65B.zip": download the quantized model (no need to waste time downloading huge files)
### Examples
Install the "7B" and "13B" models:
```javascript
const Dalai = require("dalai");
const dalai = new Dalai()
await dalai.install("7B", "13B")
```
-->
---
# FAQ
## Updating to the latest
Dalai is a young project and will evolve quickly.
As of `dalai@0.3.0` the recommended way to use dalai is through `npm install -g` (not the `npx` method)
To update dalai, you will need to run the dalai command with a version number specified (You only need to do this once when you update).
For example, let's say you've been using `dalai@0.1.0` but a new version `dalai@0.2.0` came out.
The simplest way to update is to just run the dalai server:
The simplest way to make sure you have the correct version is running:
```
npx dalai@0.2.0 serve
npm install -g dalai@0.3.0
```
Once you run the command it will ask you if you want to update. Confirm, and it will now install `0.2.0`, and from that point on you don't need to specify the version. You can just run `npx dalai serve` and the new version will be executed from that point on.
## Staying up to date

BIN
docs/alpa.png Normal file

Binary file not shown.

After

(image error) Size: 56 KiB

BIN
docs/alpaca.gif Normal file

Binary file not shown.

After

(image error) Size: 7.9 MiB

BIN
docs/alpaca_spec.png Normal file

Binary file not shown.

After

(image error) Size: 278 KiB

BIN
docs/cmd.png Normal file

Binary file not shown.

After

(image error) Size: 120 KiB

BIN
docs/cmd2.png Normal file

Binary file not shown.

After

(image error) Size: 56 KiB

BIN
docs/llam.png Normal file

Binary file not shown.

After

(image error) Size: 54 KiB

366
index.js

@ -14,6 +14,8 @@ const semver = require('semver');
const _7z = require('7zip-min');
const platform = os.platform()
const shell = platform === 'win32' ? 'powershell.exe' : 'bash';
const L = require("./llama")
const A = require("./alpaca")
class Dalai {
constructor(home) {
////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -30,7 +32,7 @@ class Dalai {
// Otherwise if you want to customize the path you can just pass in the "home" attribute to manually set it.
//
////////////////////////////////////////////////////////////////////////////////////////////////////////////
this.home = home ? path.resolve(home) : path.resolve(os.homedir(), "llama.cpp")
this.home = home ? path.resolve(home) : path.resolve(os.homedir(), "dalai")
try {
fs.mkdirSync(this.home, { recursive: true })
} catch (e) { }
@ -39,87 +41,10 @@ class Dalai {
cols: 200,
rows: 30,
}
}
async download(model) {
console.log(`Download model ${model}`)
const num = {
"7B": 1,
"13B": 2,
"30B": 4,
"65B": 8,
this.cores = {
llama: new L(this),
alpaca: new A(this),
}
const files = ["checklist.chk", "params.json"]
for(let i=0; i<num[model]; i++) {
files.push(`consolidated.0${i}.pth`)
}
const resolvedPath = path.resolve(this.home, "models", model)
await fs.promises.mkdir(resolvedPath, { recursive: true }).catch((e) => { })
for(let file of files) {
if (fs.existsSync(path.resolve(resolvedPath, file))) {
console.log(`Skip file download, it already exists: ${file}`)
continue;
}
const task = `downloading ${file}`
const downloader = new Downloader({
url: `https://agi.gpt4.org/llama/LLaMA/${model}/${file}`,
directory: path.resolve(this.home, "models", model),
onProgress: (percentage, chunk, remainingSize) => {
this.progress(task, percentage)
},
});
try {
await this.startProgress(task)
await downloader.download();
} catch (error) {
console.log(error);
}
this.progressBar.update(1);
term("\n")
}
const files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
for(let file of files2) {
if (fs.existsSync(path.resolve(this.home, "models", file))) {
console.log(`Skip file download, it already exists: ${file}`)
continue;
}
const task = `downloading ${file}`
const downloader = new Downloader({
url: `https://agi.gpt4.org/llama/LLaMA/${file}`,
directory: path.resolve(this.home, "models"),
onProgress: (percentage, chunk, remainingSize) => {
this.progress(task, percentage)
},
});
try {
await this.startProgress(task)
await downloader.download();
} catch (error) {
console.log(error);
}
this.progressBar.update(1);
term("\n")
}
}
async installed() {
const modelsPath = path.resolve(this.home, "models")
console.log("modelsPath", modelsPath)
const modelFolders = (await fs.promises.readdir(modelsPath, { withFileTypes: true }))
.filter(dirent => dirent.isDirectory())
.map(dirent => dirent.name)
console.log({ modelFolders })
const modelNames = []
for(let modelFolder of modelFolders) {
if (fs.existsSync(path.resolve(modelsPath, modelFolder, 'ggml-model-q4_0.bin'))) {
modelNames.push(modelFolder)
console.log("exists", modelFolder)
}
}
return modelNames
}
async python () {
// install self-contained python => only for windows for now
@ -179,29 +104,160 @@ class Dalai {
console.log("cleaning up temp files")
await fs.promises.rm(path.resolve(this.home, "x86_64-12.2.0-release-win32-seh-msvcrt-rt_v10-rev2.7z"))
}
async install(...models) {
async query(req, cb) {
console.log(`> query:`, req)
if (req.method === "installed") {
let models = await this.installed()
for(let model of models) {
cb(model)
}
cb('\n\n<end>')
return
}
const [Core, Model] = req.model.split(".")
console.log( { Core, Model } )
let o = {
seed: req.seed || -1,
threads: req.threads || 8,
n_predict: req.n_predict || 128,
model: `models/${Model || "7B"}/ggml-model-q4_0.bin`,
}
if (!fs.existsSync(path.resolve(this.home, Core, "models", Model))) {
cb(`File does not exist: ${Model}. Try "dalai ${Core} get ${Model}" first.`)
return
}
if (req.top_k) o.top_k = req.top_k
if (req.top_p) o.top_p = req.top_p
if (req.temp) o.temp = req.temp
if (req.batch_size) o.batch_size = req.batch_size
if (req.repeat_last_n) o.repeat_last_n = req.repeat_last_n
if (req.repeat_penalty) o.repeat_penalty = req.repeat_penalty
if (typeof req.interactive !== "undefined") o.interactive = req.interactive
let chunks = []
for(let key in o) {
chunks.push(`--${key} ${o[key]}`)
}
chunks.push(`-p "${req.prompt}"`)
const main_bin_path = platform === "win32" ? path.resolve(this.home, Core, "build", "Release", this.cores[Core].launcher[platform]) : path.resolve(this.home, Core, this.cores[Core].launcher[platform])
if (req.full) {
await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.cores[Core].home, cb)
} else {
const startpattern = /.*sampling parameters:.*/g
const endpattern = /.*mem per token.*/g
let started = req.debug
let ended = false
let writeEnd = !req.skip_end
await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.cores[Core].home, (msg) => {
if (endpattern.test(msg)) ended = true
if (started && !ended) {
cb(msg)
} else if (ended && writeEnd) {
cb('\n\n<end>')
writeEnd = false
}
if (startpattern.test(msg)) started = true
})
}
}
async get(core, ...models) {
let res = await this.cores[core].get(...models)
return res
}
async installed() {
// get cores
const modelNames = []
for(let core of ["alpaca", "llama"]) {
const modelsPath = path.resolve(this.home, core, "models")
console.log("modelsPath", modelsPath)
let modelFolders = []
try {
modelFolders = (await fs.promises.readdir(modelsPath, { withFileTypes: true }))
.filter(dirent => dirent.isDirectory())
.map(dirent => dirent.name)
} catch (e) {
}
console.log({ modelFolders })
for(let modelFolder of modelFolders) {
if (fs.existsSync(path.resolve(modelsPath, modelFolder, 'ggml-model-q4_0.bin'))) {
modelNames.push(`${core}.${modelFolder}`)
console.log("exists", modelFolder)
}
}
}
return modelNames
}
async install (core) {
/**************************************************************************************************************
*
* 2. Download Core
*
**************************************************************************************************************/
let engine = this.cores[core]
try {
if (fs.existsSync(path.resolve(engine.home))) {
console.log("try fetching", engine.home, engine.url)
await git.fetch({ fs, http, dir: engine.home, url: engine.url })
} else {
console.log("try cloning", engine.home, engine.url)
await git.clone({ fs, http, dir: engine.home, url: engine.url })
}
} catch (e) {
console.log("ERROR", e)
}
/**************************************************************************************************************
*
* 4. Compile & Build
* - make: linux + mac
* - cmake: windows
*
**************************************************************************************************************/
await this.cores[core].make()
}
async setup() {
let success;
/**************************************************************************************************************
*
* 1. Validate
*
**************************************************************************************************************/
// Check if current version is greater than or equal to 18
const node_version = process.version;
if (!semver.gte(node_version, '18.0.0')) {
throw new Error("outdated Node version, please install Node 18 or newer")
}
let success;
try {
console.log("try cloning")
await git.clone({ fs, http, dir: this.home, url: "https://github.com/ggerganov/llama.cpp.git" })
} catch (e) {
console.log("try pulling")
await git.pull({ fs, http, dir: this.home, url: "https://github.com/ggerganov/llama.cpp.git" })
}
// windows don't ship with python, so install a dedicated self-contained python
/**************************************************************************************************************
*
* 3. Download Global Dependencies
* - Python (windows only)
* - build-essential (linux only)
* - virtualenv
* - torch, numpy, etc.
*
**************************************************************************************************************/
// 3.1. Python: Windows doesn't ship with python, so install a dedicated self-contained python
if (platform === "win32") {
await this.python()
}
const root_python_paths = (platform === "win32" ? [path.resolve(this.home, "python", "python.exe")] : ["python3", "python"])
const root_pip_paths = (platform === "win32" ? [path.resolve(this.home, "python", "python -m pip")] : ["pip3", "pip"])
// prerequisites
// 3.2. Build tools
if (platform === "linux") {
// ubuntu debian
success = await this.exec("apt-get install build-essential python3-venv -y")
@ -218,10 +274,19 @@ class Dalai {
if (!success) {
throw new Error("cannot install virtualenv")
}
}
// create venv
const venv_path = path.join(this.home, "venv")
// cmake (only on windows. the rest platforms use make)
if (platform === "win32") {
success = await this.exec(`${pip_path} install cmake`)
if (!success) {
throw new Error("cmake installation failed")
return
}
}
}
// 3.3. virtualenv
const venv_path = path.join(this.home, "venv")
for(let root_python_path of root_python_paths) {
success = await this.exec(`${root_python_path} -m venv ${venv_path}`)
if (success) break;
@ -231,54 +296,21 @@ class Dalai {
return
}
// different venv paths for Windows
// 3.4. Python libraries
const pip_path = platform === "win32" ? path.join(venv_path, "Scripts", "pip.exe") : path.join(venv_path, "bin", "pip")
const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
// upgrade setuptools
success = await this.exec(`${pip_path} install --upgrade pip setuptools wheel`)
if (!success) {
throw new Error("pip setuptools wheel upgrade failed")
return
}
// install to ~/llama.cpp
success = await this.exec(`${pip_path} install torch torchvision torchaudio sentencepiece numpy`)
if (!success) {
throw new Error("dependency installation failed")
return
}
if (platform === "win32") {
success = await this.exec(`${pip_path} install cmake`)
if (!success) {
throw new Error("cmake installation failed")
return
}
await this.exec("mkdir build", this.home)
await this.exec(`Remove-Item -path ${path.resolve(this.home, "build", "CMakeCache.txt")}`, this.home)
const cmake_path = path.join(venv_path, "Scripts", "cmake")
await this.exec(`${cmake_path} ..`, path.resolve(this.home, "build"))
await this.exec(`${cmake_path} --build . --config Release`, path.resolve(this.home, "build"))
} else {
success = await this.exec("make", this.home)
if (!success) {
throw new Error("running 'make' failed")
return
}
}
for(let model of models) {
await this.download(model)
const outputFile = path.resolve(this.home, 'models', model, 'ggml-model-f16.bin')
if (fs.existsSync(outputFile)) {
console.log(`Skip conversion, file already exists: ${outputFile}`)
} else {
await this.exec(`${python_path} convert-pth-to-ggml.py models/${model}/ 1`, this.home)
}
await this.quantize(model)
}
}
serve(port) {
const httpServer = createServer();
@ -309,63 +341,6 @@ class Dalai {
await this.query(req, cb)
}
}
async query(req, cb) {
console.log(`> query:`, req)
if (req.method === "installed") {
let models = await this.installed()
for(let model of models) {
cb(model)
}
cb('\n\n<end>')
return
}
let o = {
seed: req.seed || -1,
threads: req.threads || 8,
n_predict: req.n_predict || 128,
model: `models/${req.model || "7B"}/ggml-model-q4_0.bin`
}
if (!fs.existsSync(path.resolve(this.home, o.model))) {
cb(`File does not exist: ${o.model}. Try "dalai llama ${req.model}" first.`)
return
}
if (req.top_k) o.top_k = req.top_k
if (req.top_p) o.top_p = req.top_p
if (req.temp) o.temp = req.temp
if (req.batch_size) o.batch_size = req.batch_size
if (req.repeat_last_n) o.repeat_last_n = req.repeat_last_n
if (req.repeat_penalty) o.repeat_penalty = req.repeat_penalty
let chunks = []
for(let key in o) {
chunks.push(`--${key} ${o[key]}`)
}
chunks.push(`-p "${req.prompt}"`)
const main_bin_path = platform === "win32" ? path.resolve(this.home, "build", "Release", "llama") : path.resolve(this.home, "main")
if (req.full) {
await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.home, cb)
} else {
const startpattern = /.*sampling parameters:.*/g
const endpattern = /.*mem per token.*/g
let started = false
let ended = false
let writeEnd = !req.skip_end
await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.home, (msg) => {
if (endpattern.test(msg)) ended = true
if (started && !ended) {
cb(msg)
} else if (ended && writeEnd) {
cb('\n\n<end>')
writeEnd = false
}
if (startpattern.test(msg)) started = true
})
}
}
connect(req, cb) {
const socket = io(req.url)
socket.emit('request', req)
@ -402,25 +377,6 @@ class Dalai {
ptyProcess.write("exit\r")
})
}
async quantize(model) {
let num = {
"7B": 1,
"13B": 2,
"30B": 4,
"65B": 8,
}
for(let i=0; i<num[model]; i++) {
const suffix = (i === 0 ? "" : `.${i}`)
const outputFile1 = path.resolve(this.home, `./models/${model}/ggml-model-f16.bin${suffix}`)
const outputFile2 = path.resolve(this.home, `./models/${model}/ggml-model-q4_0.bin${suffix}`)
if (fs.existsSync(outputFile1) && fs.existsSync(outputFile2)) {
console.log(`Skip quantization, files already exists: ${outputFile1} and ${outputFile2}}`)
continue
}
const bin_path = platform === "win32" ? path.resolve(this.home, "build", "Release") : this.home
await this.exec(`./quantize ${outputFile1} ${outputFile2} 2`, bin_path)
}
}
progress(task, percent) {
this.progressBar.update(percent/100);
//if (percent >= 100) {

172
llama.js Normal file

@ -0,0 +1,172 @@
const path = require('path');
const term = require( 'terminal-kit' ).terminal;
const git = require('isomorphic-git');
const Downloader = require("nodejs-file-downloader");
const http = require('isomorphic-git/http/node');
const os = require('os');
const fs = require("fs");
const platform = os.platform()
class LLaMA {
constructor(root) {
this.root = root
this.home = path.resolve(this.root.home, "llama")
this.url = "https://github.com/ggerganov/llama.cpp.git"
this.launcher = {
win32: "llama",
linux: "main",
darwin: "main"
}
}
async make() {
let success
if (platform === "win32") {
// CMake on Windows
const venv_path = path.join(this.root.home, "venv")
const cmake_path = path.join(venv_path, "Scripts", "cmake")
await this.root.exec("mkdir build", this.home)
await this.root.exec(`Remove-Item -path ${path.resolve(this.home, "build", "CMakeCache.txt")}`, this.home)
await this.root.exec(`${cmake_path} ..`, path.resolve(this.home, "build"))
await this.root.exec(`${cmake_path} --build . --config Release`, path.resolve(this.home, "build"))
} else {
// Make on linux + mac
success = await this.root.exec(`make`, this.home)
if (!success) {
throw new Error("running 'make' failed")
return
}
}
}
async get (...models) {
if (models.length === 0) models = ["7B"]
for(let model of models) {
if (!["7B", "13B", "30B", "65B"].includes(model)) {
console.log(`##########################################################
#
# ERROR
# The arguments must be one or more of the following:
#
# 7B, 13B, 30B, 65B
#
##########################################################
[Example]
# install just 7B (default)
npx dalai install
# install 7B manually
npx dalai install 7B
# install 7B and 13B
npx dalai install 7B 13B
`)
throw new Error("The model name must be one of: 7B, 13B, 30B, and 65B")
return
}
}
const venv_path = path.join(this.root.home, "venv")
const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
/**************************************************************************************************************
*
* 5. Download models + convert + quantize
*
**************************************************************************************************************/
for(let model of models) {
await this.download(model)
const outputFile = path.resolve(this.home, 'models', model, 'ggml-model-f16.bin')
if (fs.existsSync(outputFile)) {
console.log(`Skip conversion, file already exists: ${outputFile}`)
} else {
await this.root.exec(`${python_path} convert-pth-to-ggml.py models/${model}/ 1`, this.home)
}
await this.quantize(model)
}
}
async make() {
}
async quantize(model) {
let num = {
"7B": 1,
"13B": 2,
"30B": 4,
"65B": 8,
}
for(let i=0; i<num[model]; i++) {
const suffix = (i === 0 ? "" : `.${i}`)
const outputFile1 = path.resolve(this.home, `./models/${model}/ggml-model-f16.bin${suffix}`)
const outputFile2 = path.resolve(this.home, `./models/${model}/ggml-model-q4_0.bin${suffix}`)
if (fs.existsSync(outputFile1) && fs.existsSync(outputFile2)) {
console.log(`Skip quantization, files already exists: ${outputFile1} and ${outputFile2}}`)
continue
}
const bin_path = platform === "win32" ? path.resolve(this.home, "build", "Release") : this.home
await this.root.exec(`./quantize ${outputFile1} ${outputFile2} 2`, bin_path)
}
}
async download(model) {
console.log(`Download model ${model}`)
const num = {
"7B": 1,
"13B": 2,
"30B": 4,
"65B": 8,
}
const files = ["checklist.chk", "params.json"]
for(let i=0; i<num[model]; i++) {
files.push(`consolidated.0${i}.pth`)
}
const resolvedPath = path.resolve(this.home, "models", model)
await fs.promises.mkdir(resolvedPath, { recursive: true }).catch((e) => { })
for(let file of files) {
if (fs.existsSync(path.resolve(resolvedPath, file))) {
console.log(`Skip file download, it already exists: ${file}`)
continue;
}
const task = `downloading ${file}`
const downloader = new Downloader({
url: `https://agi.gpt4.org/llama/LLaMA/${model}/${file}`,
directory: path.resolve(this.home, "models", model),
onProgress: (percentage, chunk, remainingSize) => {
this.root.progress(task, percentage)
},
});
try {
await this.root.startProgress(task)
await downloader.download();
} catch (error) {
console.log(error);
}
this.root.progressBar.update(1);
term("\n")
}
const files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
for(let file of files2) {
if (fs.existsSync(path.resolve(this.home, "models", file))) {
console.log(`Skip file download, it already exists: ${file}`)
continue;
}
const task = `downloading ${file}`
const downloader = new Downloader({
url: `https://agi.gpt4.org/llama/LLaMA/${file}`,
directory: path.resolve(this.home, "models"),
onProgress: (percentage, chunk, remainingSize) => {
this.root.progress(task, percentage)
},
});
try {
await this.root.startProgress(task)
await downloader.download();
} catch (error) {
console.log(error);
}
this.root.progressBar.update(1);
term("\n")
}
}
}
module.exports = LLaMA

1
package-lock.json generated

@ -7,6 +7,7 @@
"": {
"name": "dalai",
"version": "0.2.0",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"7zip-min": "^1.4.4",

@ -9,7 +9,8 @@
"dalai:llama": "./dalai llama",
"start": "./dalai serve",
"just:run": "wrap () { yarn && yarn dalai:llama $1 && yarn start; }; wrap",
"just:fix": "npx prettier --write ."
"just:fix": "npx prettier --write .",
"postinstall": "node ./bin/cli setup"
},
"dependencies": {
"7zip-min": "^1.4.4",