1
mirror of https://github.com/cocktailpeanut/dalai synced 2024-11-20 23:07:32 +01:00

Downloader fix

use a method that doesn't waste memory (and eventually run out of system memory)
This commit is contained in:
cocktailpeanut 2023-03-17 00:38:04 -04:00
parent dfeecdbb58
commit b3b75a08fb
6 changed files with 163 additions and 10 deletions

View File

@ -36,6 +36,7 @@ class Alpaca {
for(let model of models) {
const venv_path = path.join(this.root.home, "venv")
const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
// const wget_path = platform == "win32" ? path.join(venv_path, "Scripts", "wget") : path.join(venv_path, 'bin', 'wget')
/**************************************************************************************************************
*
* 5. Download models + convert + quantize
@ -46,6 +47,13 @@ class Alpaca {
console.log(`Skip conversion, file already exists: ${outputFile}`)
} else {
const task = `downloading ${outputFile}`
const url = "https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC"
const dir = path.resolve(this.home, "models", model)
await fs.promises.mkdir(dir, { recursive: true }).catch((e) => { })
await this.root.down(url, path.resolve(dir, "ggml-model-q4_0.bin"))
//await this.root.exec(`${python_path} -m wget -o ggml-model-q4_0.bin ${url}`, dir)
/*
const downloader = new Downloader({
//url: "https://gateway.estuary.tech/gw/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC",
url: "https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC",
@ -69,6 +77,7 @@ class Alpaca {
}
this.root.progressBar.update(1);
term("\n")
*/
}
}
}

View File

@ -2,6 +2,7 @@ const os = require('os');
const pty = require('node-pty');
const git = require('isomorphic-git');
const http = require('isomorphic-git/http/node');
const Http = require("http")
const path = require('path');
const fs = require("fs");
const tar = require('tar');
@ -12,6 +13,7 @@ const term = require( 'terminal-kit' ).terminal;
const Downloader = require("nodejs-file-downloader");
const semver = require('semver');
const _7z = require('7zip-min');
const axios = require('axios')
const platform = os.platform()
const shell = platform === 'win32' ? 'powershell.exe' : 'bash';
const L = require("./llama")
@ -46,6 +48,34 @@ class Dalai {
alpaca: new A(this),
}
}
down(url, dest, headers) {
return new Promise((resolve, reject) => {
const task = path.basename(dest)
this.startProgress(task)
axios({
url,
method: 'GET',
responseType: 'stream',
maxContentLength: Infinity,
headers,
onDownloadProgress: progressEvent => {
const progress = (progressEvent.loaded / progressEvent.total) * 100;
this.progress(task, progress)
}
}).then(response => {
const writer = fs.createWriteStream(dest);
response.data.pipe(writer);
writer.on('finish', () => {
this.progressBar.update(1);
term("\n")
resolve()
});
}).catch(error => {
reject(error)
});
})
}
async python () {
// install self-contained python => only for windows for now
// 1. download
@ -302,6 +332,7 @@ class Dalai {
return
}
success = await this.exec(`${pip_path} install torch torchvision torchaudio sentencepiece numpy`)
//success = await this.exec(`${pip_path} install torch torchvision torchaudio sentencepiece numpy wget`)
if (!success) {
throw new Error("dependency installation failed")
return

View File

@ -100,6 +100,8 @@ npx dalai install 7B 13B
}
async download(model) {
console.log(`Download model ${model}`)
const venv_path = path.join(this.root.home, "venv")
const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python')
const num = {
"7B": 1,
"13B": 2,
@ -114,11 +116,18 @@ npx dalai install 7B 13B
await fs.promises.mkdir(resolvedPath, { recursive: true }).catch((e) => { })
for(let file of files) {
if (fs.existsSync(path.resolve(resolvedPath, file))) {
console.log(`Skip file download, it already exists: ${file}`)
continue;
}
// if (fs.existsSync(path.resolve(resolvedPath, file))) {
// console.log(`Skip file download, it already exists: ${file}`)
// continue;
// }
const url = `https://agi.gpt4.org/llama/LLaMA/${model}/${file}`
await this.root.down(url, path.resolve(resolvedPath, file), {
"User-Agent": "Mozilla/5.0"
})
// await this.root.exec(`${python_path} -m wget --user-agent="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36" ${url}`, resolvedPath)
/*
const task = `downloading ${file}`
const downloader = new Downloader({
url: `https://agi.gpt4.org/llama/LLaMA/${model}/${file}`,
@ -135,14 +144,23 @@ npx dalai install 7B 13B
}
this.root.progressBar.update(1);
term("\n")
*/
}
const files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
for(let file of files2) {
if (fs.existsSync(path.resolve(this.home, "models", file))) {
console.log(`Skip file download, it already exists: ${file}`)
continue;
}
// if (fs.existsSync(path.resolve(this.home, "models", file))) {
// console.log(`Skip file download, it already exists: ${file}`)
// continue;
// }
const url = `https://agi.gpt4.org/llama/LLaMA/${file}`
const dir = path.resolve(this.home, "models")
//await this.root.exec(`${python_path} -m wget --user-agent="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36" ${url}`, dir)
await this.root.down(url, path.resolve(dir, file), {
"User-Agent": "Mozilla/5.0"
})
/*
const task = `downloading ${file}`
const downloader = new Downloader({
url: `https://agi.gpt4.org/llama/LLaMA/${file}`,
@ -159,6 +177,7 @@ npx dalai install 7B 13B
}
this.root.progressBar.update(1);
term("\n")
*/
}
}

53
package-lock.json generated
View File

@ -11,6 +11,7 @@
"license": "MIT",
"dependencies": {
"7zip-min": "^1.4.4",
"axios": "^1.3.4",
"ejs": "^3.1.8",
"express": "^4.18.2",
"isomorphic-git": "^1.22.0",
@ -151,6 +152,21 @@
"resolved": "https://registry.npmjs.org/async-lock/-/async-lock-1.4.0.tgz",
"integrity": "sha512-coglx5yIWuetakm3/1dsX9hxCNox22h7+V80RQOu2XUUMidtArxKoZoOtHUPuR84SycKTXzgGzAUR5hJxujyJQ=="
},
"node_modules/asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
},
"node_modules/axios": {
"version": "1.3.4",
"resolved": "https://registry.npmjs.org/axios/-/axios-1.3.4.tgz",
"integrity": "sha512-toYm+Bsyl6VC5wSkfkbbNB6ROv7KY93PEBBL6xyDczaIHasAiv4wPqQ/c4RjoQzipxRD2W5g21cOqQulZ7rHwQ==",
"dependencies": {
"follow-redirects": "^1.15.0",
"form-data": "^4.0.0",
"proxy-from-env": "^1.1.0"
}
},
"node_modules/balanced-match": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
@ -265,6 +281,17 @@
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
},
"node_modules/combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"dependencies": {
"delayed-stream": "~1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/concat-map": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
@ -355,6 +382,14 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"engines": {
"node": ">=0.4.0"
}
},
"node_modules/depd": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
@ -611,6 +646,19 @@
}
}
},
"node_modules/form-data": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"mime-types": "^2.1.12"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/forwarded": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
@ -1156,6 +1204,11 @@
"node": ">= 0.10"
}
},
"node_modules/proxy-from-env": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
},
"node_modules/qs": {
"version": "6.11.0",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz",

View File

@ -14,6 +14,7 @@
},
"dependencies": {
"7zip-min": "^1.4.4",
"axios": "^1.3.4",
"ejs": "^3.1.8",
"express": "^4.18.2",
"isomorphic-git": "^1.22.0",

View File

@ -85,6 +85,20 @@ async@^3.2.3:
resolved "https://registry.npmjs.org/async/-/async-3.2.4.tgz"
integrity sha512-iAB+JbDEGXhyIUavoDl9WP/Jj106Kz9DEn1DPgYw5ruDn0e3Wgi3sKFm55sASdGBNOQB8F59d9qQ7deqrHA8wQ==
asynckit@^0.4.0:
version "0.4.0"
resolved "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz"
integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
axios@^1.3.4:
version "1.3.4"
resolved "https://registry.npmjs.org/axios/-/axios-1.3.4.tgz"
integrity sha512-toYm+Bsyl6VC5wSkfkbbNB6ROv7KY93PEBBL6xyDczaIHasAiv4wPqQ/c4RjoQzipxRD2W5g21cOqQulZ7rHwQ==
dependencies:
follow-redirects "^1.15.0"
form-data "^4.0.0"
proxy-from-env "^1.1.0"
balanced-match@^1.0.0:
version "1.0.2"
resolved "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz"
@ -176,6 +190,13 @@ color-name@~1.1.4:
resolved "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz"
integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
combined-stream@^1.0.8:
version "1.0.8"
resolved "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz"
integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
dependencies:
delayed-stream "~1.0.0"
concat-map@0.0.1:
version "0.0.1"
resolved "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz"
@ -263,6 +284,11 @@ decompress-response@^6.0.0:
dependencies:
mimic-response "^3.1.0"
delayed-stream@~1.0.0:
version "1.0.0"
resolved "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz"
integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
depd@2.0.0:
version "2.0.0"
resolved "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz"
@ -394,11 +420,20 @@ finalhandler@1.2.0:
statuses "2.0.1"
unpipe "~1.0.0"
follow-redirects@^1.15.1:
follow-redirects@^1.15.0, follow-redirects@^1.15.1:
version "1.15.2"
resolved "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz"
integrity sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==
form-data@^4.0.0:
version "4.0.0"
resolved "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz"
integrity sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==
dependencies:
asynckit "^0.4.0"
combined-stream "^1.0.8"
mime-types "^2.1.12"
forwarded@0.2.0:
version "0.2.0"
resolved "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz"
@ -562,7 +597,7 @@ mime-db@1.52.0:
resolved "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz"
integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==
mime-types@^2.1.27, mime-types@~2.1.24, mime-types@~2.1.34:
mime-types@^2.1.12, mime-types@^2.1.27, mime-types@~2.1.24, mime-types@~2.1.34:
version "2.1.35"
resolved "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz"
integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==
@ -765,6 +800,11 @@ proxy-addr@~2.0.7:
forwarded "0.2.0"
ipaddr.js "1.9.1"
proxy-from-env@^1.1.0:
version "1.1.0"
resolved "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz"
integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==
qs@6.11.0:
version "6.11.0"
resolved "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz"