dalai alpaca

2025-03-06 18:53:01 +01:00 · 2023-03-16 18:46:41 -04:00 · 2023-03-16 18:46:41 -04:00 · a4cbf1c73c
commit a4cbf1c73c
parent 607f17af29
14 changed files with 797 additions and 307 deletions
--- a/alpaca.js
+++ b/alpaca.js
@ -0,0 +1,80 @@
+const path = require('path');
+const term = require( 'terminal-kit' ).terminal;
+const git = require('isomorphic-git');
+const Downloader = require("nodejs-file-downloader");
+const http = require('isomorphic-git/http/node');
+const os = require('os');
+const fs = require("fs");
+const platform = os.platform()
+class Alpaca {
+  constructor(root) {
+    this.root = root
+    this.home = path.resolve(this.root.home, "alpaca")
+    this.url = "https://github.com/cocktailpeanut/alpaca.cpp.git"
+    this.launcher = {
+win32: "chat",
+      linux: "chat",
+      darwin: "chat",
+    }
+  }
+  async make() {
+    let success
+    if (platform === "win32") {
+      // CMake on Windows
+      const venv_path = path.join(this.root.home, "venv")
+      const cmake_path = path.join(venv_path, "Scripts", "cmake")
+      await this.root.exec("mkdir build", this.home)      
+      await this.root.exec(`Remove-Item -path ${path.resolve(this.home, "build", "CMakeCache.txt")}`, this.home)
+      await this.root.exec(`make chat`, this.home)
+    } else {
+      // Make on linux + mac
+      success = await this.root.exec(`make chat`, this.home)
+      if (!success) {
+        throw new Error("running 'make' failed")
+        return
+      }
+    }
+  }
+  async get (...models) {
+    for(let model of models) {
+      const venv_path = path.join(this.root.home, "venv")
+      const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
+      /**************************************************************************************************************
+      *
+      * 5. Download models + convert + quantize
+      *
+      **************************************************************************************************************/
+      const outputFile = path.resolve(this.home, 'models', model, 'ggml-model-q4_0.bin')
+      if (fs.existsSync(outputFile)) {
+        console.log(`Skip conversion, file already exists: ${outputFile}`)
+      } else {
+        const task = `downloading ${outputFile}`
+        const downloader = new Downloader({
+          url: "https://gateway.estuary.tech/gw/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC",
+          //url: "https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC",
+          //url: `https://cloudflare-ipfs.com/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC`,
+          fileName: 'ggml-model-q4_0.bin',
+          directory: path.resolve(this.home, "models", model),
+          maxAttempts: 3, //Default is 1.
+          onError: function (error) {
+            //You can also hook into each failed attempt.
+            console.log("Error from attempt ", error);
+          },
+          onProgress: (percentage, chunk, remainingSize) => {
+            this.root.progress(task, percentage)
+          },
+        });
+        try {
+          await this.root.startProgress(task)
+          await downloader.download();
+        } catch (error) {
+          console.log(error);
+        }
+        this.root.progressBar.update(1);
+        term("\n")
+      }
+    }
+  }
+
+}
+module.exports = Alpaca
--- a/bin/cli.js
+++ b/bin/cli.js
@ -6,37 +6,42 @@ if (process.argv.length > 0) {
  if (cmd === "serve") {
    const port = (args.length > 0 ? parseInt(args[0]) : 3000)
    Web(port)
-  } else if (cmd === "llama" || cmd === "install") {
-    if (args.length === 0) args = ["7B"]
-    for(let arg of args) {
-      if (!["7B", "13B",  "30B", "65B"].includes(arg)) {
-        console.log(`##########################################################
-#
-#   ERROR
-#   The arguments must be one or more of the following:
-# 
-#   7B, 13B, 30B, 65B
-#
-##########################################################
-
-[Example]
-
-# install just 7B (default)
-npx dalai install   
-
-# install 7B manually
-npx dalai install 7B
-
-# install 7B and 13B
-npx dalai install 7B 13B
-`)
-        process.exit(1)
-        break;
-      }
-    }
-    new Dalai().install(...args).then(() => {
+  } else if (cmd === "setup") {
+    new Dalai().setup().then(() => {
      process.exit(0)
+    }).catch((e) => {
+      console.log("Error", e)
+      process.exit(1)
    })
+  } else {
+    if (args.length > 0) {
+      let core = cmd
+      let [method, ...callparams] = args 
+      let dalai = new Dalai()
+      console.log({ method, callparams })
+      // 1. install => install the core module
+      // 2. get => get models
+      dalai[method](core, ...callparams).then(() => {
+        process.exit(0)
+      }).catch((e) => {
+        console.log("ERROR", e)
+        process.exit(1)
+      })
+    } else {
+      console.log("############################################")
+      console.log("#")
+      console.log("#  Supported Commands:")
+      console.log("#")
+      console.log("#  1. System command")
+      console.log("#")
+      console.log("#    dalai serve <port (optional)>")
+      console.log("#")
+      console.log("#  2. Model command")
+      console.log("#")
+      console.log("#    dalai llama get <model names>")
+      console.log("#")
+      console.log("############################################")
+    }
  }
 } else {
  console.log("ERROR: Please pass a command")
--- a/bin/web/views/index.ejs
+++ b/bin/web/views/index.ejs
@ -59,11 +59,11 @@
    flex-grow: 1;
  }
  input[type=text] , select {
-    margin-right: 5px;
    border: none;
    background: rgba(0,0,0,0.08);
    padding: 5px 10px;
    box-sizing: border-box;
+    width: 100px;
  }
  .logo {
    font-weight: bold;
@ -83,6 +83,7 @@
  .kv {
    display: block;
    font-size: 14px;
+    margin-left: 10px;
  }
  .kv label {
    display: block;
@ -111,13 +112,13 @@
 const config = {
  seed: -1,
  threads: 4,
-  n_predict: 1000,
-  model: "7B",
+  n_predict: 200,
  top_k: 40,
  top_p: 0.9,
  temp: 0.8,
  repeat_last_n: 64,
  repeat_penalty: 1.3,
+  debug: false,
  models: []
 }
 const socket = io();
@ -125,17 +126,27 @@ const form = document.getElementById('form');
 const input = document.querySelector('#input');
 const model = document.querySelector('#model');
 const renderHeader = (config) => {
-  const fields = ["n_predict", "repeat_last_n", "repeat_penalty", "top_k", "top_p", "temp", "seed"].map((key) => {
-    return `<div class='kv'>
+  const fields = [{ key: "debug", type: "checkbox" }, "n_predict", "repeat_last_n", "repeat_penalty", "top_k", "top_p", "temp", "seed"].map((key) => {
+    if (typeof key === "string") {
+      return `<div class='kv'>
 <label>${key}</label>
 <input name="${key}" type='text' placeholder="${key}" value="${config[key] || ''}">
 </div>`
+    } else {
+      if (key.type === "checkbox") {
+      return `<div class='kv'>
+<label>${key.key}</label>
+<input name="${key.key}" type='checkbox' ${config[key.key] ? "checked" : ""}>
+</div>`
+      }
+    }

  }).join("")


-  const models = config.models.map((model) => {
-    return `<option value="7B" ${config.model === model ? "selected" : ""}>${model}</option>`
+  config.model = config.models[0]
+  const models = config.models.map((model, i) => {
+    return `<option value="${model}" ${i === 0 ? "selected" : ""}>${model}</option>`
  }).join("")
  return `<a class='logo' href="/">Dalai</a><div class='stretch'></div>
 <div class='config-container'>
@ -158,6 +169,8 @@ const loading = (on) => {
 document.querySelector("form").addEventListener("input", (e) => {
  if (e.target.tagName === "SELECT") {
    config[e.target.name] = config.models[e.target.selectedIndex]
+  } else if (e.target.type === "checkbox") {
+    config[e.target.name] = e.target.checked
  } else {
    config[e.target.name] = e.target.value
  }
@ -203,6 +216,7 @@ socket.emit('request', {
 })
 socket.on('result', async ({ request, response }) => {
  loading(false)
+  console.log(response)
  if (request.method === "installed") {
    if (response == "\n\n<end>") {
      document.querySelector(".form-header").innerHTML = renderHeader(config)
--- a/docs/README.md
+++ b/docs/README.md
@ -1,6 +1,6 @@
 # Dalai

-Run LLaMA on your computer.
+Run LLaMA and Alpaca on your computer.

 <a href="https://github.com/cocktailpeanut/dalai" class='inverse btn'><i class="fa-brands fa-github"></i> Github</a>
 <a href="https://twitter.com/cocktailpeanut" class='inverse btn'><i class="fa-brands fa-twitter"></i> Twitter</a>
@ -8,17 +8,23 @@ Run LLaMA on your computer.

 ---

-#### JUST RUN THIS:
+## JUST RUN THIS

-<img src="terminal.png" class='round'>
+<img src="alpa.png" class='round'>

-#### TO GET:
+or

-![dalai.gif](dalai.gif)
+<img src="llam.png" class='round'>
+
+## TO GET
+
+Both alpaca and llama working on your computer!
+
+![alpaca.gif](alpaca.gif)

 ---

-1. Powered by [llama.cpp](https://github.com/ggerganov/llama.cpp) and [llama-dl CDN](https://github.com/shawwn/llama-dl)
+1. Powered by [llama.cpp](https://github.com/ggerganov/llama.cpp), [llama-dl CDN](https://github.com/shawwn/llama-dl), and [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp)
 2. Hackable web app included
 3. Ships with JavaScript API
 4. Ships with [Socket.io](https://socket.io/) API
@ -41,6 +47,19 @@ Runs on most modern computers. Unless your computer is very very old, it should

 ## 3. Disk Space Requirements

+### Alpaca
+
+Currently only the 7B model is available via [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp)
+
+#### 7B
+
+Alpaca comes fully quantized (compressed), and the only space you need for the 7B model is 4.21GB:
+
+![alpaca_spec.png](alpaca_spec.png)
+
+
+### LLaMA
+
 You need a lot of space for storing the models.

 You do NOT have to install all models, you can install one by one. Let's take a look at how much space each model takes up:
@ -51,28 +70,28 @@ You do NOT have to install all models, you can install one by one. Let's take a
 >
 > You can optimize this if you delete the original models (which are much larger) after installation and keep only the quantized versions.

-### 7B
+#### 7B

 - Full: The model takes up 31.17GB
 - Quantized: 4.21GB

 ![7b.png](7b.png)

-### 13B
+#### 13B

 - Full: The model takes up 60.21GB
 - Quantized: 4.07GB * 2 = 8.14GB

 ![13b.png](13b.png)

-### 30B
+#### 30B

 - Full: The model takes up 150.48GB
 - Quantized: 5.09GB * 4 = 20.36GB

 ![30b.png](30b.png)

-### 65B
+#### 65B

 - Full: The model takes up 432.64GB
 - Quantized: 5.11GB * 8 = 40.88GB
@ -91,28 +110,67 @@ You do NOT have to install all models, you can install one by one. Let's take a

 ### Step 2. Install Dalai

-Basic install (7B model only)
+First install dalai:

 ```
-npx dalai llama
+npm install -g dalai
 ```

-Or, install all models
+### Step 3. Install Engines
+
+Currently supported engines are `llama` and `alpaca`.
+
+#### Install LLaMA
+
+To install `llama`, run:

 ```
-npx dalai llama 7B 13B 30B 65B
+dalai llama install
 ```

-The install command :
+#### Install Alpaca
+
+To install `alpaca`, run:
+
+```
+dalai alpaca install
+```
+
+### Step 4. Get Models
+
+#### Download LLaMA models
+
+To download llama models, you can run:
+
+```
+dalai llama get 7B
+```
+
+
+or to download multiple models:
+
+```
+dalai llama get 7B 13B
+```
+
+#### Download Alpaca models
+
+Currently alpaca only has the 7B model:
+
+```
+dalai alpaca get 7B
+```

-1. Creates a folder named `dalai` under your home directory (`~`)
-2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/llama.cpp`
-3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/llama.cpp/models`
-4. Runs some tasks to convert the LLaMA models so they can be used

 ### Step 3. Run Web UI

-After everything has been installed, open http://localhost:3000 in your browser. Have fun!
+After everything has been installed, run the following command to launch the web UI server:
+
+```
+dalai serve
+```
+
+and open http://localhost:3000 in your browser. Have fun!

 ---

@ -126,8 +184,6 @@ Press the button below to visit the Visual Studio downloads page and download:

 <a href="https://visualstudio.microsoft.com/downloads/" class='btn'>Download Microsoft Visual Studio</a>

---
-
 **IMPORTANT!!!**

 When installing Visual Studio, make sure to check the 3 options as highlighted below:
@ -138,32 +194,22 @@ When installing Visual Studio, make sure to check the 3 options as highlighted b

 ![vs.png](vs.png)

+---

 ### Step 2.1. Install Dalai

-Basic install (7B model only)
+First install dalai:

 ```
-npx dalai llama
+npm install -g dalai
 ```

-Or, install all models
-
-```
-npx dalai llama 7B 13B 30B 65B
-```
-
-The install command :
-
-1. Creates a folder named `dalai` under your home directory (`~`)
-2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/llama.cpp`
-3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/llama.cpp/models`
-4. Runs some tasks to convert the LLaMA models so they can be used
-
 If this worked without any errors, go to step 3.

 Ohterwise try the troubleshoot below:

+---
+
 ### Step 2.2. Troubleshoot (optional)

 In case above steps fail to install, try installing node.js and python separately.
@ -181,44 +227,222 @@ After both have been installed, open powershell and type `python` to see if the
 Once you've checked that they both exist, try the `npx dalai llama` command again.


-### Step 3. Run Web UI
+---
+
+
+### Step 3. Install Engines
+
+Currently supported engines are `llama` and `alpaca`.
+
+#### Install LLaMA
+
+To install `llama`, run:
+
+```
+dalai llama install
+```
+
+#### Install Alpaca
+
+To install `alpaca`, run:
+
+```
+dalai alpaca install
+```
+
+---
+
+### Step 4. Get Models
+
+#### Download LLaMA models
+
+To download llama models, you can run:
+
+```
+dalai llama get 7B
+```
+
+
+or to download multiple models:
+
+```
+dalai llama get 7B 13B
+```
+
+#### Download Alpaca models
+
+Currently alpaca only has the 7B model:
+
+```
+dalai alpaca get 7B
+```
+
+---
+
+### Step 5. Run Web UI
+
+After everything has been installed, run the following command to launch the web UI server:
+
+```
+dalai serve
+```
+
+and open http://localhost:3000 in your browser. Have fun!

-After everything has been installed, open http://localhost:3000 in your browser. Have fun!

 ---


 ## Linux

-### Step 1. Install
+### Step 1. Install Dependencies

-After everything has been installed, open http://localhost:3000 in your browser. Have fun!
+You need to make sure you have the correct version of Python and Node.js installed.

-Basic install (7B model only)
+#### Step 1.1. Python <= 3.10
+
+<a href="https://pimylifeup.com/installing-python-on-linux/" class='btn'>Download node.js</a>
+
+> Make sure the version is 3.10 or lower (not 3.11)
+Python must be 3.10 or below (pytorch and other libraries are not supported yet on the latest)
+
+
+
+#### Step 1.2. Node.js >= 18
+
+<a href="https://nodejs.org/en/download/package-manager/" class='btn'>Download node.js</a>
+
+> Make sure the version is 18 or higher
+
+
+
+### Step 2. Install Dalai
+
+First install dalai:

 ```
-npx dalai llama
+npm install -g dalai
 ```

-Or, install all models
+### Step 3. Install Engines
+
+Currently supported engines are `llama` and `alpaca`.
+
+#### Install LLaMA
+
+To install `llama`, run:

 ```
-npx dalai llama 7B 13B 30B 65B
+dalai llama install
 ```

-The install command :
+#### Install Alpaca

-1. Creates a folder named `dalai` under your home directory (`~`)
-2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/llama.cpp`
-3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/llama.cpp/models`
-4. Runs some tasks to convert the LLaMA models so they can be used
+To install `alpaca`, run:

-### Step 2. Run Web UI
+```
+dalai alpaca install
+```
+
+### Step 4. Get Models
+
+#### Download LLaMA models
+
+To download llama models, you can run:
+
+```
+dalai llama get 7B
+```
+
+
+or to download multiple models:
+
+```
+dalai llama get 7B 13B
+```
+
+#### Download Alpaca models
+
+Currently alpaca only has the 7B model:
+
+```
+dalai alpaca get 7B
+```
+
+
+### Step 3. Run Web UI
+
+After everything has been installed, run the following command to launch the web UI server:
+
+```
+dalai serve
+```
+
+and open http://localhost:3000 in your browser. Have fun!

-After everything has been installed, open http://localhost:3000 in your browser. Have fun!

 ---

+
+# Commands
+
+## 1. install
+
+### LLaMA
+
+Install the core engine for the model
+
+```
+dalai llama install
+```
+
+### Alpaca
+
+Install the core engine for the model
+
+```
+dalai alpaca install
+```
+
+## 2. get
+
+Download the full LLaMA model and convert and compress them
+
+### LLaMA
+
+Download one model:
+
+```
+dalai llama get 7B
+```
+
+Download multiple models:
+
+```
+dalai llama get 7B 13B
+```
+
+### Alpaca
+
+Currently only 7B available:
+
+```
+dalai alpaca get 7B
+```
+
+
+## 3. serve
+
+Start a dalai server and an API endpoint (powered by socket.io)
+
+```
+dalai serve
+```
+
+
+---
+
+
 # API

 Dalai is also an NPM package:
@ -281,7 +505,8 @@ dalai.request(req, callback)

 - `req`: a request object. made up of the following attributes:
  - `prompt`: **(required)** The prompt string
-  - `model`: **(required)** The model name to query ("7B", "13B", etc.)
+  - `model`: **(required)** The model type + model name to query. Takes the following form: `<model_type>.<model_name>`
+    - Example: `alpaca.7B`, `llama.13B`, ...
  - `url`: only needed if connecting to a remote dalai server
    - if unspecified, it uses the node.js API to directly run dalai locally
    - if specified (for example `ws://localhost:3000`) it looks for a socket.io endpoint at the URL and connects to it.
@ -388,15 +613,18 @@ http.listen(3000, () => {
 })
 ```

-## 5. install()
+## 5. get()

 ### Syntax

 ```javascript
-await dalai.install(model1, model2, ...)
+await dalai.install(model_type, model_name1, model_name2, ...)
 ```

- `models`: the model names to install ("7B"`, "13B", "30B", "65B", etc)
+- `model_type`: the name of the model. currently supports:
+  - "alpaca"
+  - "llama"
+- `model1`, `model2`, ...: the model names to install ("7B"`, "13B", "30B", "65B", etc)

 ### Examples

@ -431,24 +659,57 @@ const models = await dalai.installed()
 console.log(models)     // prints ["7B", "13B"]
 ```

+<!--
+
+---
+
+## 7. download()
+
+Download models.
+
+There are two download options:
+
+1. **LLaMA:** Download the original LLaMA model, convert it, and quantize (compress) it
+2. **LLaMA.zip:** Download the compressed version (generated from step 1 and published on HuggingFace)
+
+### Syntax
+
+```javascript
+await dalai.download(model1, model2, model3, ...)
+```
+
+- `models`: the model names to install. Can be: "7B"`, "13B", "30B", "65B", "7B.zip", "13B.zip", "30B.zip", "65B.zip"
+  - "7B", "13B", "30B", "65B": download the raw model, convert, and quantize
+  - "7B.zip", "13B.zip", "30B.zip", "65B.zip": download the quantized model (no need to waste time downloading huge files)
+
+### Examples
+
+Install the "7B" and "13B" models:
+
+
+```javascript
+const Dalai = require("dalai");
+const dalai = new Dalai()
+await dalai.install("7B", "13B")
+```
+
+-->
+
+---
+
+
 # FAQ

 ## Updating to the latest

-Dalai is a young project and will evolve quickly.
+As of `dalai@0.3.0` the recommended way to use dalai is through `npm install -g` (not the `npx` method)

-To update dalai, you will need to run the dalai command with a version number specified (You only need to do this once when you update).
-
-For example, let's say you've been using `dalai@0.1.0` but a new version `dalai@0.2.0` came out.
-
-The simplest way to update is to just run the dalai server:
+The simplest way to make sure you have the correct version is running:

 ```
-npx dalai@0.2.0 serve
+npm install -g dalai@0.3.0    
 ```

-Once you run the command it will ask you if you want to update. Confirm, and it will now install `0.2.0`, and from that point on you don't need to specify the version. You can just run `npx dalai serve` and the new version will be executed from that point on.
-

 ## Staying up to date

--- a/docs/alpa.png
+++ b/docs/alpa.png
--- a/docs/alpaca.gif
+++ b/docs/alpaca.gif
--- a/docs/alpaca_spec.png
+++ b/docs/alpaca_spec.png
--- a/docs/cmd.png
+++ b/docs/cmd.png
--- a/docs/cmd2.png
+++ b/docs/cmd2.png
--- a/docs/llam.png
+++ b/docs/llam.png
--- a/index.js
+++ b/index.js
@ -14,6 +14,8 @@ const semver = require('semver');
 const _7z = require('7zip-min');
 const platform = os.platform()
 const shell = platform === 'win32' ? 'powershell.exe' : 'bash';
+const L = require("./llama")
+const A = require("./alpaca")
 class Dalai {
  constructor(home) {
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -30,7 +32,7 @@ class Dalai {
    //  Otherwise if you want to customize the path you can just pass in the "home" attribute to manually set it.
    //
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    this.home = home ? path.resolve(home) : path.resolve(os.homedir(), "llama.cpp")
+    this.home = home ? path.resolve(home) : path.resolve(os.homedir(), "dalai")
    try {
      fs.mkdirSync(this.home, { recursive: true })
    } catch (e) { }
@ -39,87 +41,10 @@ class Dalai {
      cols: 200,
      rows: 30,
    }
-  }
-  async download(model) {
-    console.log(`Download model ${model}`)
-    const num = {
-      "7B": 1,
-      "13B": 2,
-      "30B": 4,
-      "65B": 8,
+    this.cores = {
+      llama: new L(this),
+      alpaca: new A(this),
    }
-    const files = ["checklist.chk", "params.json"]
-    for(let i=0; i<num[model]; i++) {
-      files.push(`consolidated.0${i}.pth`)
-    }
-    const resolvedPath = path.resolve(this.home, "models", model)
-    await fs.promises.mkdir(resolvedPath, { recursive: true }).catch((e) => { })
-
-    for(let file of files) {
-      if (fs.existsSync(path.resolve(resolvedPath, file))) {
-        console.log(`Skip file download, it already exists: ${file}`)
-        continue;
-      }
-
-      const task = `downloading ${file}`
-      const downloader = new Downloader({
-        url: `https://agi.gpt4.org/llama/LLaMA/${model}/${file}`,
-        directory: path.resolve(this.home, "models", model),
-        onProgress: (percentage, chunk, remainingSize) => {
-          this.progress(task, percentage)
-        },
-      });
-      try {
-        await this.startProgress(task)
-        await downloader.download();
-      } catch (error) {
-        console.log(error);
-      }
-      this.progressBar.update(1);
-      term("\n")
-    }
-
-    const files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
-    for(let file of files2) {
-      if (fs.existsSync(path.resolve(this.home, "models", file))) {
-        console.log(`Skip file download, it already exists: ${file}`)
-        continue;
-      }
-      const task = `downloading ${file}`
-      const downloader = new Downloader({
-        url: `https://agi.gpt4.org/llama/LLaMA/${file}`,
-        directory: path.resolve(this.home, "models"),
-        onProgress: (percentage, chunk, remainingSize) => {
-          this.progress(task, percentage)
-        },
-      });
-      try {
-        await this.startProgress(task)
-        await downloader.download();
-      } catch (error) {
-        console.log(error);
-      }
-      this.progressBar.update(1);
-      term("\n")
-    }
-
-  }
-  async installed() {
-    const modelsPath = path.resolve(this.home, "models")
-    console.log("modelsPath", modelsPath)
-    const modelFolders = (await fs.promises.readdir(modelsPath, { withFileTypes: true }))
-      .filter(dirent => dirent.isDirectory())
-      .map(dirent => dirent.name)
-
-    console.log({ modelFolders })
-    const modelNames = []
-    for(let modelFolder of modelFolders) {
-      if (fs.existsSync(path.resolve(modelsPath, modelFolder, 'ggml-model-q4_0.bin'))) {
-        modelNames.push(modelFolder)
-        console.log("exists", modelFolder)
-      }
-    }
-    return modelNames
  }
  async python () {
    // install self-contained python => only for windows for now
@ -179,29 +104,160 @@ class Dalai {
    console.log("cleaning up temp files")
    await fs.promises.rm(path.resolve(this.home, "x86_64-12.2.0-release-win32-seh-msvcrt-rt_v10-rev2.7z"))
  }
-  async install(...models) {
+  async query(req, cb) {
+    
+    console.log(`> query:`, req)
+    if (req.method === "installed") {
+      let models = await this.installed()
+      for(let model of models) {
+        cb(model)
+      }
+      cb('\n\n<end>')
+      return
+    }
+
+
+    const [Core, Model] = req.model.split(".")
+
+    console.log( { Core, Model } )
+
+    let o = {
+      seed: req.seed || -1,
+      threads: req.threads || 8,
+      n_predict: req.n_predict || 128,
+      model: `models/${Model || "7B"}/ggml-model-q4_0.bin`,
+    }
+
+    if (!fs.existsSync(path.resolve(this.home, Core, "models", Model))) {
+      cb(`File does not exist: ${Model}. Try "dalai ${Core} get ${Model}" first.`)
+      return
+    }
+
+    if (req.top_k) o.top_k = req.top_k
+    if (req.top_p) o.top_p = req.top_p
+    if (req.temp) o.temp = req.temp
+    if (req.batch_size) o.batch_size = req.batch_size
+    if (req.repeat_last_n) o.repeat_last_n = req.repeat_last_n
+    if (req.repeat_penalty) o.repeat_penalty = req.repeat_penalty
+    if (typeof req.interactive !== "undefined") o.interactive = req.interactive
+
+    let chunks = []
+    for(let key in o) {
+      chunks.push(`--${key} ${o[key]}`)
+    }
+    chunks.push(`-p "${req.prompt}"`)
+
+    const main_bin_path = platform === "win32" ? path.resolve(this.home, Core, "build", "Release", this.cores[Core].launcher[platform]) : path.resolve(this.home, Core, this.cores[Core].launcher[platform])
+    if (req.full) {
+      await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.cores[Core].home, cb)
+    } else {
+      const startpattern = /.*sampling parameters:.*/g
+      const endpattern = /.*mem per token.*/g
+      let started = req.debug
+      let ended = false
+      let writeEnd = !req.skip_end
+      await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.cores[Core].home, (msg) => {
+        if (endpattern.test(msg)) ended = true
+        if (started && !ended) {
+          cb(msg)
+        } else if (ended && writeEnd) {
+          cb('\n\n<end>')
+          writeEnd = false
+        }
+        if (startpattern.test(msg)) started = true
+      })
+    }
+  }
+  async get(core, ...models) {
+    let res = await this.cores[core].get(...models)
+    return res
+  }
+  async installed() {
+    // get cores
+    const modelNames = []
+    for(let core of ["alpaca", "llama"]) {
+      const modelsPath = path.resolve(this.home, core, "models")
+      console.log("modelsPath", modelsPath)
+      let modelFolders = []
+      try {
+        modelFolders = (await fs.promises.readdir(modelsPath, { withFileTypes: true }))
+          .filter(dirent => dirent.isDirectory())
+          .map(dirent => dirent.name)
+      } catch (e) {
+      }
+
+      console.log({ modelFolders })
+      for(let modelFolder of modelFolders) {
+        if (fs.existsSync(path.resolve(modelsPath, modelFolder, 'ggml-model-q4_0.bin'))) {
+          modelNames.push(`${core}.${modelFolder}`)
+          console.log("exists", modelFolder)
+        }
+      }
+    }
+    return modelNames
+  }
+  async install (core) {
+    /**************************************************************************************************************
+    *
+    * 2. Download Core
+    *
+    **************************************************************************************************************/
+    let engine = this.cores[core]
+    try {
+      if (fs.existsSync(path.resolve(engine.home))) {
+        console.log("try fetching", engine.home, engine.url)
+        await git.fetch({ fs, http, dir: engine.home, url: engine.url })
+      } else {
+        console.log("try cloning", engine.home, engine.url)
+        await git.clone({ fs, http, dir: engine.home, url: engine.url })
+      }
+    } catch (e) {
+      console.log("ERROR", e)
+    }
+    /**************************************************************************************************************
+    *
+    * 4. Compile & Build
+    *   - make: linux + mac
+    *   - cmake: windows
+    *
+    **************************************************************************************************************/
+    await this.cores[core].make()
+  }
+  async setup() {
+
+    let success;
+
+    /**************************************************************************************************************
+    *
+    * 1. Validate
+    *
+    **************************************************************************************************************/
    // Check if current version is greater than or equal to 18
    const node_version = process.version;
    if (!semver.gte(node_version, '18.0.0')) {
      throw new Error("outdated Node version, please install Node 18 or newer")
    }
-    let success;
-    try {
-      console.log("try cloning")
-      await git.clone({ fs, http, dir: this.home, url: "https://github.com/ggerganov/llama.cpp.git" })
-    } catch (e) {
-      console.log("try pulling")
-      await git.pull({ fs, http, dir: this.home, url: "https://github.com/ggerganov/llama.cpp.git" })
-    }

-    // windows don't ship with python, so install a dedicated self-contained python
+
+
+    /**************************************************************************************************************
+    *
+    * 3. Download Global Dependencies
+    *   - Python (windows only)
+    *   - build-essential (linux only)
+    *   - virtualenv
+    *   - torch, numpy, etc.
+    *
+    **************************************************************************************************************/
+
+    // 3.1. Python: Windows doesn't ship with python, so install a dedicated self-contained python
    if (platform === "win32") {
      await this.python() 
    }
    const root_python_paths = (platform === "win32" ? [path.resolve(this.home, "python", "python.exe")] : ["python3", "python"])
    const root_pip_paths = (platform === "win32" ? [path.resolve(this.home, "python", "python -m pip")] : ["pip3", "pip"])

-    // prerequisites
+    // 3.2. Build tools
    if (platform === "linux") {
      // ubuntu debian
      success = await this.exec("apt-get install build-essential python3-venv -y")
@ -218,10 +274,19 @@ class Dalai {
      if (!success) {
        throw new Error("cannot install virtualenv")
      }
-    }
-    // create venv
-    const venv_path = path.join(this.home, "venv")

+      // cmake (only on windows. the rest platforms use make)
+      if (platform === "win32") {
+        success = await this.exec(`${pip_path} install cmake`)
+        if (!success) {
+          throw new Error("cmake installation failed")
+          return
+        }
+      }
+    }
+
+    // 3.3. virtualenv
+    const venv_path = path.join(this.home, "venv")
    for(let root_python_path of root_python_paths) {
      success = await this.exec(`${root_python_path} -m venv ${venv_path}`)
      if (success) break;
@ -231,54 +296,21 @@ class Dalai {
      return
    }

-    // different venv paths for Windows
+    // 3.4. Python libraries
    const pip_path = platform === "win32" ? path.join(venv_path, "Scripts", "pip.exe") : path.join(venv_path, "bin", "pip")
    const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
-
-    // upgrade setuptools
    success = await this.exec(`${pip_path} install --upgrade pip setuptools wheel`)
    if (!success) {
      throw new Error("pip setuptools wheel upgrade failed")
      return
    }
-
-    // install to ~/llama.cpp
    success = await this.exec(`${pip_path} install torch torchvision torchaudio sentencepiece numpy`)
    if (!success) {
      throw new Error("dependency installation failed")
      return
    }

-    if (platform === "win32") {
-      success = await this.exec(`${pip_path} install cmake`)
-      if (!success) {
-        throw new Error("cmake installation failed")
-        return
-      }
-      await this.exec("mkdir build", this.home)      
-      await this.exec(`Remove-Item -path ${path.resolve(this.home, "build", "CMakeCache.txt")}`, this.home)

-      const cmake_path = path.join(venv_path, "Scripts", "cmake")
-      await this.exec(`${cmake_path} ..`, path.resolve(this.home, "build"))
-      await this.exec(`${cmake_path} --build . --config Release`, path.resolve(this.home, "build"))
-
-    } else {
-      success = await this.exec("make", this.home)
-      if (!success) {
-        throw new Error("running 'make' failed")
-        return
-      }
-    }
-    for(let model of models) {
-      await this.download(model)
-      const outputFile = path.resolve(this.home, 'models', model, 'ggml-model-f16.bin')
-      if (fs.existsSync(outputFile)) {
-        console.log(`Skip conversion, file already exists: ${outputFile}`)
-      } else {
-        await this.exec(`${python_path} convert-pth-to-ggml.py models/${model}/ 1`, this.home)
-      }
-      await this.quantize(model)
-    }
  }
  serve(port) {
    const httpServer = createServer();
@ -309,63 +341,6 @@ class Dalai {
      await this.query(req, cb)
    }
  }
-  async query(req, cb) {
-    console.log(`> query:`, req)
-    if (req.method === "installed") {
-      let models = await this.installed()
-      for(let model of models) {
-        cb(model)
-      }
-      cb('\n\n<end>')
-      return
-    }
-
-    let o = {
-      seed: req.seed || -1,
-      threads: req.threads || 8,
-      n_predict: req.n_predict || 128,
-      model: `models/${req.model || "7B"}/ggml-model-q4_0.bin`
-    }
-
-    if (!fs.existsSync(path.resolve(this.home, o.model))) {
-      cb(`File does not exist: ${o.model}. Try "dalai llama ${req.model}" first.`)
-      return
-    }
-
-    if (req.top_k) o.top_k = req.top_k
-    if (req.top_p) o.top_p = req.top_p
-    if (req.temp) o.temp = req.temp
-    if (req.batch_size) o.batch_size = req.batch_size
-    if (req.repeat_last_n) o.repeat_last_n = req.repeat_last_n
-    if (req.repeat_penalty) o.repeat_penalty = req.repeat_penalty
-
-    let chunks = []
-    for(let key in o) {
-      chunks.push(`--${key} ${o[key]}`)
-    }
-    chunks.push(`-p "${req.prompt}"`)
-
-    const main_bin_path = platform === "win32" ? path.resolve(this.home, "build", "Release", "llama") : path.resolve(this.home, "main")
-    if (req.full) {
-      await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.home, cb)
-    } else {
-      const startpattern = /.*sampling parameters:.*/g
-      const endpattern = /.*mem per token.*/g
-      let started = false
-      let ended = false
-      let writeEnd = !req.skip_end
-      await this.exec(`${main_bin_path} ${chunks.join(" ")}`, this.home, (msg) => {
-        if (endpattern.test(msg)) ended = true
-        if (started && !ended) {
-          cb(msg)
-        } else if (ended && writeEnd) {
-          cb('\n\n<end>')
-          writeEnd = false
-        }
-        if (startpattern.test(msg)) started = true
-      })
-    }
-  }
  connect(req, cb) {
    const socket = io(req.url)
    socket.emit('request', req)
@ -402,25 +377,6 @@ class Dalai {
      ptyProcess.write("exit\r")
    })
  }
-  async quantize(model) {
-    let num = {
-      "7B": 1,
-      "13B": 2,
-      "30B": 4,
-      "65B": 8,
-    }
-    for(let i=0; i<num[model]; i++) {
-      const suffix = (i === 0 ? "" : `.${i}`)
-      const outputFile1 = path.resolve(this.home, `./models/${model}/ggml-model-f16.bin${suffix}`)
-      const outputFile2 = path.resolve(this.home, `./models/${model}/ggml-model-q4_0.bin${suffix}`)
-      if (fs.existsSync(outputFile1) && fs.existsSync(outputFile2)) {
-        console.log(`Skip quantization, files already exists: ${outputFile1} and ${outputFile2}}`)
-        continue
-      }
-      const bin_path = platform === "win32" ? path.resolve(this.home, "build", "Release") : this.home
-      await this.exec(`./quantize ${outputFile1} ${outputFile2} 2`, bin_path)
-    }
-  }
  progress(task, percent) {
    this.progressBar.update(percent/100);
    //if (percent >= 100) {
--- a/llama.js
+++ b/llama.js
@ -0,0 +1,172 @@
+const path = require('path');
+const term = require( 'terminal-kit' ).terminal;
+const git = require('isomorphic-git');
+const Downloader = require("nodejs-file-downloader");
+const http = require('isomorphic-git/http/node');
+const os = require('os');
+const fs = require("fs");
+const platform = os.platform()
+class LLaMA {
+  constructor(root) {
+    this.root = root
+    this.home = path.resolve(this.root.home, "llama")
+    this.url = "https://github.com/ggerganov/llama.cpp.git"
+    this.launcher = {
+      win32: "llama",
+      linux: "main",
+      darwin: "main"
+    }
+  }
+  async make() {
+    let success
+    if (platform === "win32") {
+      // CMake on Windows
+      const venv_path = path.join(this.root.home, "venv")
+      const cmake_path = path.join(venv_path, "Scripts", "cmake")
+      await this.root.exec("mkdir build", this.home)      
+      await this.root.exec(`Remove-Item -path ${path.resolve(this.home, "build", "CMakeCache.txt")}`, this.home)
+      await this.root.exec(`${cmake_path} ..`, path.resolve(this.home, "build"))
+      await this.root.exec(`${cmake_path} --build . --config Release`, path.resolve(this.home, "build"))
+    } else {
+      // Make on linux + mac
+      success = await this.root.exec(`make`, this.home)
+      if (!success) {
+        throw new Error("running 'make' failed")
+        return
+      }
+    }
+  }
+  async get (...models) {
+    if (models.length === 0) models = ["7B"]
+    for(let model of models) {
+      if (!["7B", "13B",  "30B", "65B"].includes(model)) {
+        console.log(`##########################################################
+#
+#   ERROR
+#   The arguments must be one or more of the following:
+# 
+#   7B, 13B, 30B, 65B
+#
+##########################################################
+
+[Example]
+
+# install just 7B (default)
+npx dalai install   
+
+# install 7B manually
+npx dalai install 7B
+
+# install 7B and 13B
+npx dalai install 7B 13B
+`)
+        throw new Error("The model name must be one of: 7B, 13B, 30B, and 65B")
+        return
+      }
+    }
+
+    const venv_path = path.join(this.root.home, "venv")
+    const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
+    /**************************************************************************************************************
+    *
+    * 5. Download models + convert + quantize
+    *
+    **************************************************************************************************************/
+    for(let model of models) {
+      await this.download(model)
+      const outputFile = path.resolve(this.home, 'models', model, 'ggml-model-f16.bin')
+      if (fs.existsSync(outputFile)) {
+        console.log(`Skip conversion, file already exists: ${outputFile}`)
+      } else {
+        await this.root.exec(`${python_path} convert-pth-to-ggml.py models/${model}/ 1`, this.home)
+      }
+      await this.quantize(model)
+    }
+  }
+  async make() {
+  }
+  async quantize(model) {
+    let num = {
+      "7B": 1,
+      "13B": 2,
+      "30B": 4,
+      "65B": 8,
+    }
+    for(let i=0; i<num[model]; i++) {
+      const suffix = (i === 0 ? "" : `.${i}`)
+      const outputFile1 = path.resolve(this.home, `./models/${model}/ggml-model-f16.bin${suffix}`)
+      const outputFile2 = path.resolve(this.home, `./models/${model}/ggml-model-q4_0.bin${suffix}`)
+      if (fs.existsSync(outputFile1) && fs.existsSync(outputFile2)) {
+        console.log(`Skip quantization, files already exists: ${outputFile1} and ${outputFile2}}`)
+        continue
+      }
+      const bin_path = platform === "win32" ? path.resolve(this.home, "build", "Release") : this.home
+      await this.root.exec(`./quantize ${outputFile1} ${outputFile2} 2`, bin_path)
+    }
+  }
+  async download(model) {
+    console.log(`Download model ${model}`)
+    const num = {
+      "7B": 1,
+      "13B": 2,
+      "30B": 4,
+      "65B": 8,
+    }
+    const files = ["checklist.chk", "params.json"]
+    for(let i=0; i<num[model]; i++) {
+      files.push(`consolidated.0${i}.pth`)
+    }
+    const resolvedPath = path.resolve(this.home, "models", model)
+    await fs.promises.mkdir(resolvedPath, { recursive: true }).catch((e) => { })
+
+    for(let file of files) {
+      if (fs.existsSync(path.resolve(resolvedPath, file))) {
+        console.log(`Skip file download, it already exists: ${file}`)
+        continue;
+      }
+
+      const task = `downloading ${file}`
+      const downloader = new Downloader({
+        url: `https://agi.gpt4.org/llama/LLaMA/${model}/${file}`,
+        directory: path.resolve(this.home, "models", model),
+        onProgress: (percentage, chunk, remainingSize) => {
+          this.root.progress(task, percentage)
+        },
+      });
+      try {
+        await this.root.startProgress(task)
+        await downloader.download();
+      } catch (error) {
+        console.log(error);
+      }
+      this.root.progressBar.update(1);
+      term("\n")
+    }
+
+    const files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
+    for(let file of files2) {
+      if (fs.existsSync(path.resolve(this.home, "models", file))) {
+        console.log(`Skip file download, it already exists: ${file}`)
+        continue;
+      }
+      const task = `downloading ${file}`
+      const downloader = new Downloader({
+        url: `https://agi.gpt4.org/llama/LLaMA/${file}`,
+        directory: path.resolve(this.home, "models"),
+        onProgress: (percentage, chunk, remainingSize) => {
+          this.root.progress(task, percentage)
+        },
+      });
+      try {
+        await this.root.startProgress(task)
+        await downloader.download();
+      } catch (error) {
+        console.log(error);
+      }
+      this.root.progressBar.update(1);
+      term("\n")
+    }
+
+  }
+}
+module.exports = LLaMA
--- a/package-lock.json
+++ b/package-lock.json
@ -7,6 +7,7 @@
    "": {
      "name": "dalai",
      "version": "0.2.0",
+      "hasInstallScript": true,
      "license": "MIT",
      "dependencies": {
        "7zip-min": "^1.4.4",
--- a/package.json
+++ b/package.json
@ -9,7 +9,8 @@
    "dalai:llama": "./dalai llama",
    "start": "./dalai serve",
    "just:run": "wrap () { yarn && yarn dalai:llama $1 && yarn start; }; wrap",
-    "just:fix": "npx prettier --write ."
+    "just:fix": "npx prettier --write .",
+    "postinstall": "node ./bin/cli setup"
  },
  "dependencies": {
    "7zip-min": "^1.4.4",