From b3b75a08fbf1eb54c0d431d0a951093507c9caef Mon Sep 17 00:00:00 2001 From: cocktailpeanut Date: Fri, 17 Mar 2023 00:38:04 -0400 Subject: [PATCH] Downloader fix use a method that doesn't waste memory (and eventually run out of system memory) --- alpaca.js | 9 ++++++++ index.js | 31 +++++++++++++++++++++++++++ llama.js | 35 ++++++++++++++++++++++++------- package-lock.json | 53 +++++++++++++++++++++++++++++++++++++++++++++++ package.json | 1 + yarn.lock | 44 +++++++++++++++++++++++++++++++++++++-- 6 files changed, 163 insertions(+), 10 deletions(-) diff --git a/alpaca.js b/alpaca.js index caea6a2..72939e7 100644 --- a/alpaca.js +++ b/alpaca.js @@ -36,6 +36,7 @@ class Alpaca { for(let model of models) { const venv_path = path.join(this.root.home, "venv") const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python') +// const wget_path = platform == "win32" ? path.join(venv_path, "Scripts", "wget") : path.join(venv_path, 'bin', 'wget') /************************************************************************************************************** * * 5. Download models + convert + quantize @@ -46,6 +47,13 @@ class Alpaca { console.log(`Skip conversion, file already exists: ${outputFile}`) } else { const task = `downloading ${outputFile}` + const url = "https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC" + const dir = path.resolve(this.home, "models", model) + await fs.promises.mkdir(dir, { recursive: true }).catch((e) => { }) + + await this.root.down(url, path.resolve(dir, "ggml-model-q4_0.bin")) + //await this.root.exec(`${python_path} -m wget -o ggml-model-q4_0.bin ${url}`, dir) + /* const downloader = new Downloader({ //url: "https://gateway.estuary.tech/gw/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC", url: "https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC", @@ -69,6 +77,7 @@ class Alpaca { } this.root.progressBar.update(1); term("\n") + */ } } } diff --git a/index.js b/index.js index 14d1add..744f221 100644 --- a/index.js +++ b/index.js @@ -2,6 +2,7 @@ const os = require('os'); const pty = require('node-pty'); const git = require('isomorphic-git'); const http = require('isomorphic-git/http/node'); +const Http = require("http") const path = require('path'); const fs = require("fs"); const tar = require('tar'); @@ -12,6 +13,7 @@ const term = require( 'terminal-kit' ).terminal; const Downloader = require("nodejs-file-downloader"); const semver = require('semver'); const _7z = require('7zip-min'); +const axios = require('axios') const platform = os.platform() const shell = platform === 'win32' ? 'powershell.exe' : 'bash'; const L = require("./llama") @@ -46,6 +48,34 @@ class Dalai { alpaca: new A(this), } } + down(url, dest, headers) { + return new Promise((resolve, reject) => { + const task = path.basename(dest) + this.startProgress(task) + axios({ + url, + method: 'GET', + responseType: 'stream', + maxContentLength: Infinity, + headers, + onDownloadProgress: progressEvent => { + const progress = (progressEvent.loaded / progressEvent.total) * 100; + this.progress(task, progress) + } + + }).then(response => { + const writer = fs.createWriteStream(dest); + response.data.pipe(writer); + writer.on('finish', () => { + this.progressBar.update(1); + term("\n") + resolve() + }); + }).catch(error => { + reject(error) + }); + }) + } async python () { // install self-contained python => only for windows for now // 1. download @@ -302,6 +332,7 @@ class Dalai { return } success = await this.exec(`${pip_path} install torch torchvision torchaudio sentencepiece numpy`) + //success = await this.exec(`${pip_path} install torch torchvision torchaudio sentencepiece numpy wget`) if (!success) { throw new Error("dependency installation failed") return diff --git a/llama.js b/llama.js index 36840fc..410a2f6 100644 --- a/llama.js +++ b/llama.js @@ -100,6 +100,8 @@ npx dalai install 7B 13B } async download(model) { console.log(`Download model ${model}`) + const venv_path = path.join(this.root.home, "venv") + const python_path = platform == "win32" ? path.join(venv_path, "Scripts", "python.exe") : path.join(venv_path, 'bin', 'python') const num = { "7B": 1, "13B": 2, @@ -114,11 +116,18 @@ npx dalai install 7B 13B await fs.promises.mkdir(resolvedPath, { recursive: true }).catch((e) => { }) for(let file of files) { - if (fs.existsSync(path.resolve(resolvedPath, file))) { - console.log(`Skip file download, it already exists: ${file}`) - continue; - } +// if (fs.existsSync(path.resolve(resolvedPath, file))) { +// console.log(`Skip file download, it already exists: ${file}`) +// continue; +// } + const url = `https://agi.gpt4.org/llama/LLaMA/${model}/${file}` + await this.root.down(url, path.resolve(resolvedPath, file), { + "User-Agent": "Mozilla/5.0" + }) +// await this.root.exec(`${python_path} -m wget --user-agent="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36" ${url}`, resolvedPath) + +/* const task = `downloading ${file}` const downloader = new Downloader({ url: `https://agi.gpt4.org/llama/LLaMA/${model}/${file}`, @@ -135,14 +144,23 @@ npx dalai install 7B 13B } this.root.progressBar.update(1); term("\n") + */ } const files2 = ["tokenizer_checklist.chk", "tokenizer.model"] for(let file of files2) { - if (fs.existsSync(path.resolve(this.home, "models", file))) { - console.log(`Skip file download, it already exists: ${file}`) - continue; - } +// if (fs.existsSync(path.resolve(this.home, "models", file))) { +// console.log(`Skip file download, it already exists: ${file}`) +// continue; +// } + const url = `https://agi.gpt4.org/llama/LLaMA/${file}` + const dir = path.resolve(this.home, "models") + //await this.root.exec(`${python_path} -m wget --user-agent="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36" ${url}`, dir) + + await this.root.down(url, path.resolve(dir, file), { + "User-Agent": "Mozilla/5.0" + }) + /* const task = `downloading ${file}` const downloader = new Downloader({ url: `https://agi.gpt4.org/llama/LLaMA/${file}`, @@ -159,6 +177,7 @@ npx dalai install 7B 13B } this.root.progressBar.update(1); term("\n") + */ } } diff --git a/package-lock.json b/package-lock.json index cb9c06c..f1dcff5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "license": "MIT", "dependencies": { "7zip-min": "^1.4.4", + "axios": "^1.3.4", "ejs": "^3.1.8", "express": "^4.18.2", "isomorphic-git": "^1.22.0", @@ -151,6 +152,21 @@ "resolved": "https://registry.npmjs.org/async-lock/-/async-lock-1.4.0.tgz", "integrity": "sha512-coglx5yIWuetakm3/1dsX9hxCNox22h7+V80RQOu2XUUMidtArxKoZoOtHUPuR84SycKTXzgGzAUR5hJxujyJQ==" }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "node_modules/axios": { + "version": "1.3.4", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.3.4.tgz", + "integrity": "sha512-toYm+Bsyl6VC5wSkfkbbNB6ROv7KY93PEBBL6xyDczaIHasAiv4wPqQ/c4RjoQzipxRD2W5g21cOqQulZ7rHwQ==", + "dependencies": { + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, "node_modules/balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", @@ -265,6 +281,17 @@ "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -355,6 +382,14 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/depd": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", @@ -611,6 +646,19 @@ } } }, + "node_modules/form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/forwarded": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", @@ -1156,6 +1204,11 @@ "node": ">= 0.10" } }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "node_modules/qs": { "version": "6.11.0", "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz", diff --git a/package.json b/package.json index 079b8aa..817c190 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ }, "dependencies": { "7zip-min": "^1.4.4", + "axios": "^1.3.4", "ejs": "^3.1.8", "express": "^4.18.2", "isomorphic-git": "^1.22.0", diff --git a/yarn.lock b/yarn.lock index 2d7e660..efac33f 100644 --- a/yarn.lock +++ b/yarn.lock @@ -85,6 +85,20 @@ async@^3.2.3: resolved "https://registry.npmjs.org/async/-/async-3.2.4.tgz" integrity sha512-iAB+JbDEGXhyIUavoDl9WP/Jj106Kz9DEn1DPgYw5ruDn0e3Wgi3sKFm55sASdGBNOQB8F59d9qQ7deqrHA8wQ== +asynckit@^0.4.0: + version "0.4.0" + resolved "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz" + integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q== + +axios@^1.3.4: + version "1.3.4" + resolved "https://registry.npmjs.org/axios/-/axios-1.3.4.tgz" + integrity sha512-toYm+Bsyl6VC5wSkfkbbNB6ROv7KY93PEBBL6xyDczaIHasAiv4wPqQ/c4RjoQzipxRD2W5g21cOqQulZ7rHwQ== + dependencies: + follow-redirects "^1.15.0" + form-data "^4.0.0" + proxy-from-env "^1.1.0" + balanced-match@^1.0.0: version "1.0.2" resolved "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz" @@ -176,6 +190,13 @@ color-name@~1.1.4: resolved "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz" integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== +combined-stream@^1.0.8: + version "1.0.8" + resolved "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz" + integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg== + dependencies: + delayed-stream "~1.0.0" + concat-map@0.0.1: version "0.0.1" resolved "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz" @@ -263,6 +284,11 @@ decompress-response@^6.0.0: dependencies: mimic-response "^3.1.0" +delayed-stream@~1.0.0: + version "1.0.0" + resolved "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz" + integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ== + depd@2.0.0: version "2.0.0" resolved "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz" @@ -394,11 +420,20 @@ finalhandler@1.2.0: statuses "2.0.1" unpipe "~1.0.0" -follow-redirects@^1.15.1: +follow-redirects@^1.15.0, follow-redirects@^1.15.1: version "1.15.2" resolved "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz" integrity sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA== +form-data@^4.0.0: + version "4.0.0" + resolved "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz" + integrity sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww== + dependencies: + asynckit "^0.4.0" + combined-stream "^1.0.8" + mime-types "^2.1.12" + forwarded@0.2.0: version "0.2.0" resolved "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz" @@ -562,7 +597,7 @@ mime-db@1.52.0: resolved "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz" integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg== -mime-types@^2.1.27, mime-types@~2.1.24, mime-types@~2.1.34: +mime-types@^2.1.12, mime-types@^2.1.27, mime-types@~2.1.24, mime-types@~2.1.34: version "2.1.35" resolved "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz" integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw== @@ -765,6 +800,11 @@ proxy-addr@~2.0.7: forwarded "0.2.0" ipaddr.js "1.9.1" +proxy-from-env@^1.1.0: + version "1.1.0" + resolved "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz" + integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg== + qs@6.11.0: version "6.11.0" resolved "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz"