From 90f7ac37da99eb48bc7dadc7e19a4ceb38740052 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Thu, 20 Feb 2025 23:16:39 +1300 Subject: [PATCH 01/15] Modernise module specification Breaking change! - Bump minimum Node version to v20 - Target ES2023 - Switch to ESModule output - Upgrade all dependencies --- .github/workflows/test.yml | 2 +- package-lock.json | 181 +++++------------------------ package.json | 19 +-- src/cli.ts | 8 +- src/helpers/load-data.ts | 4 +- src/helpers/norm-path.ts | 2 +- src/helpers/parse-gitattributes.ts | 4 +- src/helpers/read-file.ts | 2 +- src/helpers/walk-tree.ts | 8 +- src/index.ts | 26 ++--- src/schema.ts | 2 +- test/folder.js | 13 ++- test/perf.js | 2 +- test/unit.js | 2 +- tsconfig.json | 8 +- 15 files changed, 83 insertions(+), 200 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2b56031..5a277e6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: - node-version: [23.x, 22.x, 20.x, 18.x, 16.x, 14.x, 12.x] + node-version: [23.x, 22.x, 21.x, 20.x] steps: - uses: actions/checkout@v2 diff --git a/package-lock.json b/package-lock.json index a4a2d46..d083edc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,20 +1,20 @@ { "name": "linguist-js", - "version": "2.9.0", - "lockfileVersion": 2, + "version": "3.0.0-dev", + "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "linguist-js", - "version": "2.9.0", + "version": "3.0.0-dev", "license": "ISC", "dependencies": { - "binary-extensions": "^2.3.0 <3", - "commander": "^9.5.0 <10", + "binary-extensions": "^3.0.0", + "commander": "^13.1.0", "common-path-prefix": "^3.0.0", - "cross-fetch": "^3.2.0 <4", + "cross-fetch": "^4.1.0", "ignore": "^7.0.3", - "isbinaryfile": "^4.0.10 <5", + "isbinaryfile": "^5.0.4", "js-yaml": "^4.1.0", "node-cache": "^5.1.2" }, @@ -24,13 +24,13 @@ }, "devDependencies": { "@types/js-yaml": "^4.0.9", - "@types/node": "ts5.0", + "@types/node": "ts5.7", "deep-object-diff": "^1.1.9", - "typescript": "~5.0.4 <5.1" + "typescript": "~5.7.3" }, "engines": { - "node": ">=12", - "npm": "<9" + "node": ">=20", + "npm": ">=10" } }, "node_modules/@types/js-yaml": { @@ -54,11 +54,11 @@ "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" }, "node_modules/binary-extensions": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", - "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-3.0.0.tgz", + "integrity": "sha512-X0RfwMgXPEesg6PCXzytQZt9Unh9gtc4SfeTNJvKifUL//Oegcc/Yf31z6hThNZ8dnD3Ir3wkHVN0eWrTvP5ww==", "engines": { - "node": ">=8" + "node": ">=18.20" }, "funding": { "url": "https://github.com/sponsors/sindresorhus" @@ -73,11 +73,11 @@ } }, "node_modules/commander": { - "version": "9.5.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-9.5.0.tgz", - "integrity": "sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ==", + "version": "13.1.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-13.1.0.tgz", + "integrity": "sha512-/rFeCpNJQbhSZjGVwO9RFV3xPqbnERS8MmIQzCtD/zl6gpJuV/bMLuN92oG3F7d8oDEHHRrujSXNUr8fpjntKw==", "engines": { - "node": "^12.20.0 || >=14" + "node": ">=18" } }, "node_modules/common-path-prefix": { @@ -86,9 +86,9 @@ "integrity": "sha512-QE33hToZseCH3jS0qN96O/bSh3kaw/h+Tq7ngyY9eWDUnTlTNUyqfqvCXioLe5Na5jFsL78ra/wuBU4iuEgd4w==" }, "node_modules/cross-fetch": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.2.0.tgz", - "integrity": "sha512-Q+xVJLoGOeIMXZmbUK4HYk+69cQH6LudR0Vu/pRm2YlU/hDV9CiS0gKUMaWY5f2NeUH9C1nV3bsTlCo0FsTV1Q==", + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-4.1.0.tgz", + "integrity": "sha512-uKm5PU+MHTootlWEY+mZ4vvXoCn4fLQxT9dSc1sXVMSFkINTJVN8cAQROpwcKm8bJ/c7rgZVIBWzH5T78sNZZw==", "dependencies": { "node-fetch": "^2.7.0" } @@ -108,11 +108,11 @@ } }, "node_modules/isbinaryfile": { - "version": "4.0.10", - "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-4.0.10.tgz", - "integrity": "sha512-iHrqe5shvBUcFbmZq9zOQHBoeOhZJu6RQGrDpBgenUm/Am+F3JM2MgQj+rK3Z601fzrL5gLZWtAPH2OBaSVcyw==", + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.4.tgz", + "integrity": "sha512-YKBKVkKhty7s8rxddb40oOkuP0NbaeXrQvLin6QMHL7Ypiy2RW9LwOVrVgZRyOrhQlayMd9t+D8yDy8MKFTSDQ==", "engines": { - "node": ">= 8.0.0" + "node": ">= 18.0.0" }, "funding": { "url": "https://github.com/sponsors/gjtorikian/" @@ -165,16 +165,16 @@ "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" }, "node_modules/typescript": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.4.tgz", - "integrity": "sha512-cW9T5W9xY37cc+jfEnaUvX91foxtHkza3Nw3wkoF4sSlKn0MONdkdEndig/qPBWXNkmplh3NzayQzCiHM4/hqw==", + "version": "5.7.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.3.tgz", + "integrity": "sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw==", "dev": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" }, "engines": { - "node": ">=12.20" + "node": ">=14.17" } }, "node_modules/undici-types": { @@ -197,126 +197,5 @@ "webidl-conversions": "^3.0.0" } } - }, - "dependencies": { - "@types/js-yaml": { - "version": "4.0.9", - "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz", - "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==", - "dev": true - }, - "@types/node": { - "version": "22.13.1", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.1.tgz", - "integrity": "sha512-jK8uzQlrvXqEU91UxiK5J7pKHyzgnI1Qnl0QDHIgVGuolJhRb9EEl28Cj9b3rGR8B2lhFCtvIm5os8lFnO/1Ew==", - "dev": true, - "requires": { - "undici-types": "~6.20.0" - } - }, - "argparse": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" - }, - "binary-extensions": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", - "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==" - }, - "clone": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/clone/-/clone-2.1.2.tgz", - "integrity": "sha512-3Pe/CF1Nn94hyhIYpjtiLhdCoEoz0DqQ+988E9gmeEdQZlojxnOb74wctFyuwWQHzqyf9X7C7MG8juUpqBJT8w==" - }, - "commander": { - "version": "9.5.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-9.5.0.tgz", - "integrity": "sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ==" - }, - "common-path-prefix": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/common-path-prefix/-/common-path-prefix-3.0.0.tgz", - "integrity": "sha512-QE33hToZseCH3jS0qN96O/bSh3kaw/h+Tq7ngyY9eWDUnTlTNUyqfqvCXioLe5Na5jFsL78ra/wuBU4iuEgd4w==" - }, - "cross-fetch": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.2.0.tgz", - "integrity": "sha512-Q+xVJLoGOeIMXZmbUK4HYk+69cQH6LudR0Vu/pRm2YlU/hDV9CiS0gKUMaWY5f2NeUH9C1nV3bsTlCo0FsTV1Q==", - "requires": { - "node-fetch": "^2.7.0" - } - }, - "deep-object-diff": { - "version": "1.1.9", - "resolved": "https://registry.npmjs.org/deep-object-diff/-/deep-object-diff-1.1.9.tgz", - "integrity": "sha512-Rn+RuwkmkDwCi2/oXOFS9Gsr5lJZu/yTGpK7wAaAIE75CC+LCGEZHpY6VQJa/RoJcrmaA/docWJZvYohlNkWPA==", - "dev": true - }, - "ignore": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.3.tgz", - "integrity": "sha512-bAH5jbK/F3T3Jls4I0SO1hmPR0dKU0a7+SY6n1yzRtG54FLO8d6w/nxLFX2Nb7dBu6cCWXPaAME6cYqFUMmuCA==" - }, - "isbinaryfile": { - "version": "4.0.10", - "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-4.0.10.tgz", - "integrity": "sha512-iHrqe5shvBUcFbmZq9zOQHBoeOhZJu6RQGrDpBgenUm/Am+F3JM2MgQj+rK3Z601fzrL5gLZWtAPH2OBaSVcyw==" - }, - "js-yaml": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", - "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", - "requires": { - "argparse": "^2.0.1" - } - }, - "node-cache": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/node-cache/-/node-cache-5.1.2.tgz", - "integrity": "sha512-t1QzWwnk4sjLWaQAS8CHgOJ+RAfmHpxFWmc36IWTiWHQfs0w5JDMBS1b1ZxQteo0vVVuWJvIUKHDkkeK7vIGCg==", - "requires": { - "clone": "2.x" - } - }, - "node-fetch": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", - "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", - "requires": { - "whatwg-url": "^5.0.0" - } - }, - "tr46": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", - "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" - }, - "typescript": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.4.tgz", - "integrity": "sha512-cW9T5W9xY37cc+jfEnaUvX91foxtHkza3Nw3wkoF4sSlKn0MONdkdEndig/qPBWXNkmplh3NzayQzCiHM4/hqw==", - "dev": true - }, - "undici-types": { - "version": "6.20.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", - "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", - "dev": true - }, - "webidl-conversions": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", - "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" - }, - "whatwg-url": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", - "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", - "requires": { - "tr46": "~0.0.3", - "webidl-conversions": "^3.0.0" - } - } } } diff --git a/package.json b/package.json index 732f954..444f6ce 100644 --- a/package.json +++ b/package.json @@ -1,15 +1,16 @@ { "name": "linguist-js", - "version": "2.9.0", + "version": "3.0.0-dev", "description": "Analyse languages used in a folder. Powered by GitHub Linguist, although it doesn't need to be installed.", "main": "dist/index.js", + "type": "module", "bin": { "linguist-js": "bin/index.js", "linguist": "bin/index.js" }, "engines": { - "node": ">=12", - "npm": "<9" + "node": ">=20", + "npm": ">=10" }, "scripts": { "download-files": "npx tsx@3 build/download-files", @@ -39,19 +40,19 @@ }, "homepage": "https://github.com/Nixinova/Linguist#readme", "dependencies": { - "binary-extensions": "^2.3.0 <3", - "commander": "^9.5.0 <10", + "binary-extensions": "^3.0.0", + "commander": "^13.1.0", "common-path-prefix": "^3.0.0", - "cross-fetch": "^3.2.0 <4", + "cross-fetch": "^4.1.0", "ignore": "^7.0.3", - "isbinaryfile": "^4.0.10 <5", + "isbinaryfile": "^5.0.4", "js-yaml": "^4.1.0", "node-cache": "^5.1.2" }, "devDependencies": { "@types/js-yaml": "^4.0.9", - "@types/node": "ts5.0", + "@types/node": "ts5.7", "deep-object-diff": "^1.1.9", - "typescript": "~5.0.4 <5.1" + "typescript": "~5.7.3" } } diff --git a/src/cli.ts b/src/cli.ts index e479e13..5e7081f 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,11 +1,11 @@ const VERSION = require('../package.json').version; -import FS from 'fs'; -import Path from 'path'; +import FS from 'node:fs'; +import Path from 'node:path'; import { program } from 'commander'; -import linguist from './index'; -import { normPath } from './helpers/norm-path'; +import linguist from './index.js'; +import { normPath } from './helpers/norm-path.js'; const colouredMsg = ([r, g, b]: number[], msg: string): string => `\u001B[${38};2;${r};${g};${b}m${msg}${'\u001b[0m'}`; const hexToRgb = (hex: string): number[] => [parseInt(hex.slice(1, 3), 16), parseInt(hex.slice(3, 5), 16), parseInt(hex.slice(5, 7), 16)]; diff --git a/src/helpers/load-data.ts b/src/helpers/load-data.ts index 8b27a1a..b45e882 100644 --- a/src/helpers/load-data.ts +++ b/src/helpers/load-data.ts @@ -1,5 +1,5 @@ -import FS from 'fs'; -import Path from 'path'; +import FS from 'node:fs'; +import Path from 'node:path'; import fetch from 'cross-fetch'; import Cache from 'node-cache'; diff --git a/src/helpers/norm-path.ts b/src/helpers/norm-path.ts index 1fa9efb..a2eb30a 100644 --- a/src/helpers/norm-path.ts +++ b/src/helpers/norm-path.ts @@ -1,4 +1,4 @@ -import Path from 'path'; +import Path from 'node:path'; export const normPath = function normalisedPath(...inputPaths: string[]) { return Path.join(...inputPaths).replace(/\\/g, '/'); diff --git a/src/helpers/parse-gitattributes.ts b/src/helpers/parse-gitattributes.ts index d243daf..06655b0 100644 --- a/src/helpers/parse-gitattributes.ts +++ b/src/helpers/parse-gitattributes.ts @@ -1,5 +1,5 @@ -import * as T from '../types'; -import { normPath } from './norm-path'; +import * as T from '../types.js'; +import { normPath } from './norm-path.js'; export type FlagAttributes = { 'vendored': boolean | null, diff --git a/src/helpers/read-file.ts b/src/helpers/read-file.ts index fbd4246..1c3b55b 100644 --- a/src/helpers/read-file.ts +++ b/src/helpers/read-file.ts @@ -1,4 +1,4 @@ -import FS from 'fs'; +import FS from 'node:fs'; /** * Read part of a file on disc. diff --git a/src/helpers/walk-tree.ts b/src/helpers/walk-tree.ts index 2f2d305..e9e14ab 100644 --- a/src/helpers/walk-tree.ts +++ b/src/helpers/walk-tree.ts @@ -1,8 +1,8 @@ -import FS from 'fs'; -import Path from 'path'; +import FS from 'node:fs'; +import Path from 'node:path'; import { Ignore } from 'ignore'; -import parseGitignore from './parse-gitignore'; -import { normPath, normAbsPath } from './norm-path'; +import parseGitignore from './parse-gitignore.js'; +import { normPath, normAbsPath } from './norm-path.js'; let allFiles: Set; let allFolders: Set; diff --git a/src/index.ts b/src/index.ts index 2513141..2393776 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,19 +1,19 @@ -import FS from 'fs'; -import Path from 'path'; +import FS from 'node:fs'; +import Path from 'node:path'; import YAML from 'js-yaml'; import ignore, { Ignore } from 'ignore'; import commonPrefix from 'common-path-prefix'; import binaryData from 'binary-extensions'; import { isBinaryFile } from 'isbinaryfile'; -import walk from './helpers/walk-tree'; -import loadFile, { parseGeneratedDataFile } from './helpers/load-data'; -import readFileChunk from './helpers/read-file'; -import parseAttributes, { FlagAttributes } from './helpers/parse-gitattributes'; -import pcre from './helpers/convert-pcre'; -import { normPath } from './helpers/norm-path'; -import * as T from './types'; -import * as S from './schema'; +import walk from './helpers/walk-tree.js'; +import loadFile, { parseGeneratedDataFile } from './helpers/load-data.js'; +import readFileChunk from './helpers/read-file.js'; +import parseAttributes, { FlagAttributes } from './helpers/parse-gitattributes.js'; +import pcre from './helpers/convert-pcre.js'; +import { normPath } from './helpers/norm-path.js'; +import * as T from './types.js'; +import * as S from './schema.js'; async function analyse(path?: string, opts?: T.Options): Promise async function analyse(paths?: string[], opts?: T.Options): Promise @@ -248,13 +248,13 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom const langMatcher = (lang: string) => `\\b${lang.toLowerCase().replace(/\W/g, '\\$&')}(?![\\w#+*]|-\*-)`; // Check for interpreter match if (opts.checkShebang && hasShebang) { - const matchesInterpretor = data.interpreters?.some(interpreter => firstLine!.match(`\\b${interpreter}\\b`)); + const matchesInterpretor = data.interpreters?.some(interpreter => firstLine.match(`\\b${interpreter}\\b`)); if (matchesInterpretor) matches.push(lang); } // Check modeline declaration if (opts.checkModeline && hasModeline) { - const modelineText = firstLine!.toLowerCase().replace(/^.*-\*-(.+)-\*-.*$/, '$1'); + const modelineText = firstLine.toLowerCase().replace(/^.*-\*-(.+)-\*-.*$/, '$1'); const matchesLang = modelineText.match(langMatcher(lang)); const matchesAlias = data.aliases?.some(lang => modelineText.match(langMatcher(lang))); if (matchesLang || matchesAlias) @@ -481,4 +481,4 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom // Return return results; } -export = analyse; +export default analyse; diff --git a/src/schema.ts b/src/schema.ts index 8617ebb..76781ea 100644 --- a/src/schema.ts +++ b/src/schema.ts @@ -1,4 +1,4 @@ -import { Category, Language } from './types' +import { Category, Language } from './types.js' export interface LanguagesScema { [name: string]: { diff --git a/test/folder.js b/test/folder.js index 349c9f2..36919eb 100644 --- a/test/folder.js +++ b/test/folder.js @@ -1,11 +1,14 @@ -const fs = require('fs'); -const linguist = require('..'); -const { updatedDiff } = require('deep-object-diff'); +import FS from 'node:fs'; +import { dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { updatedDiff } from 'deep-object-diff'; +import linguist from '../dist/index.js'; async function testFolder() { console.info('-'.repeat(11) + '\nFolder test\n' + '-'.repeat(11)); - const samplesFolder = __dirname.replace(/\\/g, '/') + '/samples'; - const expectedJson = fs.readFileSync(__dirname + '/expected.json', { encoding: 'utf8' }); + const curFolder = dirname(fileURLToPath(import.meta.url)); + const samplesFolder = curFolder.replace(/\\/g, '/') + '/samples'; + const expectedJson = FS.readFileSync(curFolder + '/expected.json', { encoding: 'utf8' }); const expected = JSON.parse(expectedJson.replace(/~/g, samplesFolder)); const actual = await linguist(samplesFolder); diff --git a/test/perf.js b/test/perf.js index 5d8d134..b75ff20 100644 --- a/test/perf.js +++ b/test/perf.js @@ -1,4 +1,4 @@ -const linguist = require('..'); +import linguist from '../dist/index.js'; async function perfTest() { let time = 0; diff --git a/test/unit.js b/test/unit.js index 0264e1b..c902ec1 100644 --- a/test/unit.js +++ b/test/unit.js @@ -1,4 +1,4 @@ -const linguist = require('..'); +import linguist from '../dist/index.js'; let i = 0; let errors = 0; diff --git a/tsconfig.json b/tsconfig.json index 5dbba49..91264ce 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -9,9 +9,9 @@ /* Examples: https://github.com/tsconfig/bases */ /* Basic Options */ - "target": "es2019", // Node 12 - "module": "commonjs", - "lib": ["es2020"], + "target": "ES2023", + "module": "NodeNext", + "lib": ["ESNext"], //"allowJs": true, //"checkJs": true, //"jsx": "preserve", @@ -46,7 +46,7 @@ //"noPropertyAccessFromIndexSignature": true, /* Module Resolution Options */ - "moduleResolution": "node", + "moduleResolution": "nodenext", "resolveJsonModule": true, //"baseUrl": "./", //"paths": {}, From 1c0ce7e190fcab1e8568c05a64fbd3a382a57372 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Thu, 20 Feb 2025 23:41:42 +1300 Subject: [PATCH 02/15] Move language metadata into `repository` key Stores type, parent, and colour. --- readme.md | 58 ++++++++++++++++++++-------------------------------- src/cli.ts | 9 ++++---- src/index.ts | 17 +++++++++------ src/types.ts | 38 ++++++++++++++-------------------- 4 files changed, 53 insertions(+), 69 deletions(-) diff --git a/readme.md b/readme.md index 5ab96ba..46a2963 100644 --- a/readme.md +++ b/readme.md @@ -51,9 +51,9 @@ Running LinguistJS on this folder will return the following JSON: "count": 5, "bytes": 6020, "lines": { - "total": 100, - "content": 90, - "code": 80, + "total": 100, + "content": 90, + "code": 80, }, "results": { "/src/index.ts": "TypeScript", @@ -63,57 +63,43 @@ Running LinguistJS on this folder will return the following JSON: "/x.pluginspec": "Ruby", }, "alternatives": { - "/x.pluginspec": ["XML"], + "/x.pluginspec": ["XML"], }, }, "languages": { "count": 3, "bytes": 6010, "lines": { - "total": 90, - "content": 80, - "code": 70, + "total": 90, + "content": 80, + "code": 70, }, "results": { - "JavaScript": { - "type": "programming", - "bytes": 1000, - "lines": { "total": 49, "content": 49, "code": 44 }, - "color": "#f1e05a" - }, - "Markdown": { - "type": "prose", - "bytes": 3000, - "lines": { "total": 10, "content": 5, "code": 5 }, - "color": "#083fa1" - }, - "Ruby": { - "type": "programming", - "bytes": 10, - "lines": { "total": 1, "content": 1, "code": 1 }, - "color": "#701516" - }, - "TypeScript": { - "type": "programming", - "bytes": 2000, - "lines": { "total": 30, "content": 25, "code": 20 }, - "color": "#2b7489" - }, + "JavaScript": { "bytes": 1000, "lines": { "total": 49, "content": 49, "code": 44 }, }, + "Markdown": { "bytes": 3000, "lines": { "total": 10, "content": 5, "code": 5 }, }, + "Ruby": { "bytes": 10, "lines": { "total": 1, "content": 1, "code": 1 }, }, + "TypeScript": { "bytes": 2000, "lines": { "total": 30, "content": 25, "code": 20 }, }, }, }, "unknown": { "count": 1, "bytes": 10, "lines": { - "total": 10, - "content": 10, - "code": 10, + "total": 10, + "content": 10, + "code": 10, }, "filenames": { "no-lang": 10, }, "extensions": {}, }, + "repository": { + "JavaScript": { "type": "programming", "color": "#f1e05a" }, + "Markdown": { "type": "prose", "color": "#083fa1" }, + "Ruby": { "type": "programming", "color": "#701516" }, + "TypeScript": { "type": "programming", "color": "#2b7489" }, + } } ``` @@ -134,13 +120,13 @@ const linguist = require('linguist-js'); // Analyse folder on disc const folder = './src'; const options = { keepVendored: false, quick: false }; -const { files, languages, unknown } = await linguist(folder, options); +const { files, languages, unknown, repository } = await linguist(folder, options); // Analyse file content from raw input const fileNames = ['file1.ts', 'file2.ts', 'ignoreme.js']; const fileContent = ['#!/usr/bin/env node', 'console.log("Example");', '"ignored"']; const options = { ignoredFiles: ['ignore*'] }; -const { files, languages, unknown } = await linguist(fileNames, { fileContent, ...options }); +const { files, languages, unknown, repository } = await linguist(fileNames, { fileContent, ...options }); ``` - `linguist(entry?, opts?)` (default export): diff --git a/src/cli.ts b/src/cli.ts index 5e7081f..d159079 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -66,7 +66,7 @@ if (args.analyze) (async () => { // Fetch language data const root = args.analyze === true ? '.' : args.analyze; const data = await linguist(root, args); - const { files, languages, unknown } = data; + const { files, languages, unknown, repository } = data; // Print output if (!args.json) { // Ignore languages with a bytes/% size less than the declared min size @@ -98,7 +98,7 @@ if (args.analyze) (async () => { } } if (other.bytes) { - languages.results["Other"] = { ...other, type: null! }; + languages.results["Other"] = other; } } @@ -121,7 +121,8 @@ if (args.analyze) (async () => { } } // List parsed results - for (const [lang, { bytes, lines, color }] of sortedEntries) { + for (const [lang, { bytes, lines }] of sortedEntries) { + const colour = hexToRgb(repository[lang].color ?? '#ededed'); const percent = (bytes: number) => bytes / (totalBytes || 1) * 100; const fmtd = { index: (++count).toString().padStart(2, ' '), @@ -129,7 +130,7 @@ if (args.analyze) (async () => { percent: percent(bytes).toFixed(2).padStart(5, ' '), bytes: bytes.toLocaleString().padStart(10, ' '), loc: lines.code.toLocaleString().padStart(10, ' '), - icon: colouredMsg(hexToRgb(color ?? '#ededed'), '\u2588'), + icon: colouredMsg(colour, '\u2588'), }; console.log(` ${fmtd.index}. ${fmtd.icon} ${fmtd.lang} ${fmtd.percent}% ${fmtd.bytes} B ${fmtd.loc} LOC`); diff --git a/src/index.ts b/src/index.ts index 2393776..e2df651 100644 --- a/src/index.ts +++ b/src/index.ts @@ -50,6 +50,7 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom files: { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 }, results: {}, alternatives: {} }, languages: { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 }, results: {} }, unknown: { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 }, extensions: {}, filenames: {} }, + repository: {}, }; // Set a common root path so that vendor paths do not incorrectly match parent folders @@ -393,7 +394,7 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom delete results.languages.results[lang]; } for (const category of hiddenCategories) { - for (const [lang, { type }] of Object.entries(results.languages.results)) { + for (const [lang, { type }] of Object.entries(results.repository)) { if (type === category) { delete results.languages.results[lang]; } @@ -438,13 +439,17 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom results.files.lines.code += loc.code; // Add results to 'languages' section if language match found, or 'unknown' section otherwise if (lang) { - const { type } = langData[lang]; + // update language in repository if not yet present + if (!results.repository[lang]) { + const { type, color } = langData[lang]; + results.repository[lang] = { type, color }; + if (opts.childLanguages) { + results.repository[lang].parent = langData[lang].group; + } + } // set default if unset - results.languages.results[lang] ??= { type, bytes: 0, lines: { total: 0, content: 0, code: 0 }, color: langData[lang].color }; + results.languages.results[lang] ??= { bytes: 0, lines: { total: 0, content: 0, code: 0 } }; // apply results to 'languages' section - if (opts.childLanguages) { - results.languages.results[lang].parent = langData[lang].group; - } results.languages.results[lang].bytes += fileSize; results.languages.bytes += fileSize; results.languages.results[lang].lines.total += loc.total; diff --git a/src/types.ts b/src/types.ts index 077e9d2..c5e9f50 100644 --- a/src/types.ts +++ b/src/types.ts @@ -30,15 +30,17 @@ export interface Options { checkModeline?: boolean } +type LinesOfCode = { + total: Integer + content: Integer + code: Integer +} + export interface Results { files: { count: Integer bytes: Bytes - lines: { - total: Integer - content: Integer - code: Integer - } + lines: LinesOfCode /** Note: Results use slashes as delimiters even on Windows. */ results: Record alternatives: Record @@ -46,32 +48,22 @@ export interface Results { languages: { count: Integer bytes: Bytes - lines: { - total: Integer - content: Integer - code: Integer - } + lines: LinesOfCode results: Record } unknown: { count: Integer bytes: Bytes - lines: { - total: Integer - content: Integer - code: Integer - } + lines: LinesOfCode extensions: Record filenames: Record } + repository: Record } From 369bd4cf7a8dd0777616e9de253fa203324b836d Mon Sep 17 00:00:00 2001 From: Nixinova Date: Thu, 20 Feb 2025 23:57:56 +1300 Subject: [PATCH 03/15] Remove userland fetch package Fetch API has native support. --- package-lock.json | 47 ---------------------------------------- package.json | 1 - src/helpers/load-data.ts | 1 - 3 files changed, 49 deletions(-) diff --git a/package-lock.json b/package-lock.json index d083edc..6e78c96 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,6 @@ "binary-extensions": "^3.0.0", "commander": "^13.1.0", "common-path-prefix": "^3.0.0", - "cross-fetch": "^4.1.0", "ignore": "^7.0.3", "isbinaryfile": "^5.0.4", "js-yaml": "^4.1.0", @@ -85,14 +84,6 @@ "resolved": "https://registry.npmjs.org/common-path-prefix/-/common-path-prefix-3.0.0.tgz", "integrity": "sha512-QE33hToZseCH3jS0qN96O/bSh3kaw/h+Tq7ngyY9eWDUnTlTNUyqfqvCXioLe5Na5jFsL78ra/wuBU4iuEgd4w==" }, - "node_modules/cross-fetch": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-4.1.0.tgz", - "integrity": "sha512-uKm5PU+MHTootlWEY+mZ4vvXoCn4fLQxT9dSc1sXVMSFkINTJVN8cAQROpwcKm8bJ/c7rgZVIBWzH5T78sNZZw==", - "dependencies": { - "node-fetch": "^2.7.0" - } - }, "node_modules/deep-object-diff": { "version": "1.1.9", "resolved": "https://registry.npmjs.org/deep-object-diff/-/deep-object-diff-1.1.9.tgz", @@ -140,30 +131,6 @@ "node": ">= 8.0.0" } }, - "node_modules/node-fetch": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", - "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", - "dependencies": { - "whatwg-url": "^5.0.0" - }, - "engines": { - "node": "4.x || >=6.0.0" - }, - "peerDependencies": { - "encoding": "^0.1.0" - }, - "peerDependenciesMeta": { - "encoding": { - "optional": true - } - } - }, - "node_modules/tr46": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", - "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" - }, "node_modules/typescript": { "version": "5.7.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.3.tgz", @@ -182,20 +149,6 @@ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", "dev": true - }, - "node_modules/webidl-conversions": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", - "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" - }, - "node_modules/whatwg-url": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", - "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", - "dependencies": { - "tr46": "~0.0.3", - "webidl-conversions": "^3.0.0" - } } } } diff --git a/package.json b/package.json index 444f6ce..14c75d4 100644 --- a/package.json +++ b/package.json @@ -43,7 +43,6 @@ "binary-extensions": "^3.0.0", "commander": "^13.1.0", "common-path-prefix": "^3.0.0", - "cross-fetch": "^4.1.0", "ignore": "^7.0.3", "isbinaryfile": "^5.0.4", "js-yaml": "^4.1.0", diff --git a/src/helpers/load-data.ts b/src/helpers/load-data.ts index b45e882..a8e4e89 100644 --- a/src/helpers/load-data.ts +++ b/src/helpers/load-data.ts @@ -1,6 +1,5 @@ import FS from 'node:fs'; import Path from 'node:path'; -import fetch from 'cross-fetch'; import Cache from 'node-cache'; const cache = new Cache({}); From 22b2c748d0f52c2b17223c35a6d5f7ea0c5a9c19 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Fri, 21 Feb 2025 00:47:11 +1300 Subject: [PATCH 04/15] Remove `analyse` CLI arg alias Doesn't work with new version of commander. --- src/cli.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index d159079..dab338e 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -3,7 +3,6 @@ const VERSION = require('../package.json').version; import FS from 'node:fs'; import Path from 'node:path'; import { program } from 'commander'; - import linguist from './index.js'; import { normPath } from './helpers/norm-path.js'; @@ -14,7 +13,7 @@ program .name('linguist') .usage('--analyze [] []') - .option('-a|--analyze|--analyse [folders...]', 'Analyse the languages of all files in a folder') + .option('-a|--analyze [folders...]', 'Analyse the languages of all files in a folder') .option('-i|--ignoredFiles ', `A list of file path globs to ignore`) .option('-l|--ignoredLanguages ', `A list of languages to ignore`) .option('-c|--categories ', 'Language categories to include in output') From 56979307bee059dc05ba41f8080a9e992efb6835 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Fri, 21 Feb 2025 00:58:49 +1300 Subject: [PATCH 05/15] Manual JSON import Using `import` logs a warning in Node v22. Import it manually by reading the JSON file to avoid this warning. --- bin/index.js | 2 +- src/cli.ts | 5 +++-- src/index.ts | 5 ++++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/bin/index.js b/bin/index.js index e8ee9bf..7c3d154 100644 --- a/bin/index.js +++ b/bin/index.js @@ -1,2 +1,2 @@ #!/usr/bin/env node -require('../dist/cli.js'); +import '../dist/cli.js'; diff --git a/src/cli.ts b/src/cli.ts index dab338e..b07cdcd 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,11 +1,12 @@ -const VERSION = require('../package.json').version; - import FS from 'node:fs'; import Path from 'node:path'; import { program } from 'commander'; import linguist from './index.js'; import { normPath } from './helpers/norm-path.js'; +const packageJson = JSON.parse(FS.readFileSync(new URL('../package.json', import.meta.url), "utf-8")); +const VERSION = packageJson.version; + const colouredMsg = ([r, g, b]: number[], msg: string): string => `\u001B[${38};2;${r};${g};${b}m${msg}${'\u001b[0m'}`; const hexToRgb = (hex: string): number[] => [parseInt(hex.slice(1, 3), 16), parseInt(hex.slice(3, 5), 16), parseInt(hex.slice(5, 7), 16)]; diff --git a/src/index.ts b/src/index.ts index e2df651..bf0c807 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,7 +3,6 @@ import Path from 'node:path'; import YAML from 'js-yaml'; import ignore, { Ignore } from 'ignore'; import commonPrefix from 'common-path-prefix'; -import binaryData from 'binary-extensions'; import { isBinaryFile } from 'isbinaryfile'; import walk from './helpers/walk-tree.js'; @@ -15,6 +14,10 @@ import { normPath } from './helpers/norm-path.js'; import * as T from './types.js'; import * as S from './schema.js'; +const binaryData = JSON.parse( + FS.readFileSync(new URL('../node_modules/binary-extensions/binary-extensions.json', import.meta.url), "utf-8") +) as string[]; + async function analyse(path?: string, opts?: T.Options): Promise async function analyse(paths?: string[], opts?: T.Options): Promise async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Promise { From f8b73620657fefd17c2f996d4190b88860809ca9 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Sat, 22 Feb 2025 16:22:57 +1300 Subject: [PATCH 06/15] Remove `code` lines result Return only total and content line counts. Can't perform proper lines of code analysis with regex only: too complicated and inaccurate, and would not be performant to implement properly. --- readme.md | 11 ++++------- src/cli.ts | 5 ++--- src/index.ts | 20 ++++++-------------- src/types.ts | 1 - test/expected.json | 20 ++++++++++---------- 5 files changed, 22 insertions(+), 35 deletions(-) diff --git a/readme.md b/readme.md index 46a2963..65dd03d 100644 --- a/readme.md +++ b/readme.md @@ -53,7 +53,6 @@ Running LinguistJS on this folder will return the following JSON: "lines": { "total": 100, "content": 90, - "code": 80, }, "results": { "/src/index.ts": "TypeScript", @@ -72,13 +71,12 @@ Running LinguistJS on this folder will return the following JSON: "lines": { "total": 90, "content": 80, - "code": 70, }, "results": { - "JavaScript": { "bytes": 1000, "lines": { "total": 49, "content": 49, "code": 44 }, }, - "Markdown": { "bytes": 3000, "lines": { "total": 10, "content": 5, "code": 5 }, }, - "Ruby": { "bytes": 10, "lines": { "total": 1, "content": 1, "code": 1 }, }, - "TypeScript": { "bytes": 2000, "lines": { "total": 30, "content": 25, "code": 20 }, }, + "JavaScript": { "bytes": 1000, "lines": { "total": 49, "content": 49 }, }, + "Markdown": { "bytes": 3000, "lines": { "total": 10, "content": 5 }, }, + "Ruby": { "bytes": 10, "lines": { "total": 1, "content": 1 }, }, + "TypeScript": { "bytes": 2000, "lines": { "total": 30, "content": 25 }, }, }, }, "unknown": { @@ -87,7 +85,6 @@ Running LinguistJS on this folder will return the following JSON: "lines": { "total": 10, "content": 10, - "code": 10, }, "filenames": { "no-lang": 10, diff --git a/src/cli.ts b/src/cli.ts index b07cdcd..64387f5 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -86,13 +86,12 @@ if (args.analyze) (async () => { const other = { bytes: 0, lines: { total: 0, content: 0, code: 0 } }; // Apply specified minimums: delete language results that do not reach the threshold for (const [lang, data] of Object.entries(languages.results)) { - const checkUnit = checkBytes ? data.bytes : data.lines.code; + const checkUnit = checkBytes ? data.bytes : data.lines.content; if (checkUnit < minBytesSize) { // Add to 'other' count other.bytes += data.bytes; other.lines.total += data.lines.total; other.lines.content += data.lines.content; - other.lines.code += data.lines.code; // Remove language result delete languages.results[lang]; } @@ -129,7 +128,7 @@ if (args.analyze) (async () => { lang: lang.padEnd(24, ' '), percent: percent(bytes).toFixed(2).padStart(5, ' '), bytes: bytes.toLocaleString().padStart(10, ' '), - loc: lines.code.toLocaleString().padStart(10, ' '), + loc: lines.content.toLocaleString().padStart(10, ' '), icon: colouredMsg(colour, '\u2588'), }; console.log(` ${fmtd.index}. ${fmtd.icon} ${fmtd.lang} ${fmtd.percent}% ${fmtd.bytes} B ${fmtd.loc} LOC`); diff --git a/src/index.ts b/src/index.ts index bf0c807..158d8eb 100644 --- a/src/index.ts +++ b/src/index.ts @@ -50,9 +50,9 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom const extensions: Record = {}; const globOverrides: Record = {}; const results: T.Results = { - files: { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 }, results: {}, alternatives: {} }, - languages: { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 }, results: {} }, - unknown: { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 }, extensions: {}, filenames: {} }, + files: { count: 0, bytes: 0, lines: { total: 0, content: 0 }, results: {}, alternatives: {} }, + languages: { count: 0, bytes: 0, lines: { total: 0, content: 0 }, results: {} }, + unknown: { count: 0, bytes: 0, lines: { total: 0, content: 0 }, extensions: {}, filenames: {} }, repository: {}, }; @@ -424,22 +424,17 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom // Calculate file size const fileSize = manualFileContent[files.indexOf(file)]?.length ?? FS.statSync(file).size; // Calculate lines of code - const loc = { total: 0, content: 0, code: 0 }; + const loc = { total: 0, content: 0 }; if (opts.calculateLines) { const fileContent = (manualFileContent[files.indexOf(file)] ?? FS.readFileSync(file).toString()) ?? ''; const allLines = fileContent.split(/\r?\n/gm); loc.total = allLines.length; loc.content = allLines.filter(line => line.trim().length > 0).length; - const codeLines = fileContent - .replace(/^\s*(\/\/|# |;|--).+/gm, '') - .replace(/\/\*.+\*\/|/sg, '') - loc.code = codeLines.split(/\r?\n/gm).filter(line => line.trim().length > 0).length; } // Apply to files totals results.files.bytes += fileSize; results.files.lines.total += loc.total; results.files.lines.content += loc.content; - results.files.lines.code += loc.code; // Add results to 'languages' section if language match found, or 'unknown' section otherwise if (lang) { // update language in repository if not yet present @@ -451,16 +446,14 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom } } // set default if unset - results.languages.results[lang] ??= { bytes: 0, lines: { total: 0, content: 0, code: 0 } }; + results.languages.results[lang] ??= { bytes: 0, lines: { total: 0, content: 0 } }; // apply results to 'languages' section results.languages.results[lang].bytes += fileSize; results.languages.bytes += fileSize; results.languages.results[lang].lines.total += loc.total; results.languages.results[lang].lines.content += loc.content; - results.languages.results[lang].lines.code += loc.code; results.languages.lines.total += loc.total; results.languages.lines.content += loc.content; - results.languages.lines.code += loc.code; } else { const ext = Path.extname(file); @@ -472,13 +465,12 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom results.unknown.bytes += fileSize; results.unknown.lines.total += loc.total; results.unknown.lines.content += loc.content; - results.unknown.lines.code += loc.code; } } // Set lines output to NaN when line calculation is disabled if (opts.calculateLines === false) { - results.files.lines = { total: NaN, content: NaN, code: NaN } + results.files.lines = { total: NaN, content: NaN } } // Set counts diff --git a/src/types.ts b/src/types.ts index c5e9f50..32f9d94 100644 --- a/src/types.ts +++ b/src/types.ts @@ -33,7 +33,6 @@ export interface Options { type LinesOfCode = { total: Integer content: Integer - code: Integer } export interface Results { diff --git a/test/expected.json b/test/expected.json index 90c7971..25950b1 100644 --- a/test/expected.json +++ b/test/expected.json @@ -2,7 +2,7 @@ "files": { "count": 12, "bytes": 199, - "lines": { "total": 27, "content": 16, "code": 11 }, + "lines": { "total": 27, "content": 16 }, "results": { "~/al.al": "Perl", "~/alternatives.asc": "AGS Script", @@ -25,20 +25,20 @@ "count": 8, "bytes": 190, "results": { - "Perl": { "type": "programming", "bytes": 0, "lines": { "total": 1, "content": 0, "code": 0 },"color": "#0298c3" }, - "AGS Script": { "type": "programming", "bytes": 14, "lines": { "total": 2, "content": 1, "code": 1 },"color": "#B9D9FF" }, - "JSON": { "type": "data", "bytes": 8, "lines": { "total": 4, "content": 2, "code": 2 },"color": "#292929"}, - "JavaScript": { "type": "programming", "bytes": 23, "lines": { "total": 4, "content": 3, "code": 3 },"color": "#f1e05a" }, - "Text": { "type": "prose", "bytes": 0, "lines": { "total": 1, "content": 0, "code": 0 } }, - "C": { "type": "programming", "bytes": 130, "lines": { "total": 10, "content": 8, "code": 4 }, "color": "#555555"}, - "C++": { "type": "programming", "bytes": 15, "lines": { "total": 2, "content": 1, "code": 0 }, "color": "#f34b7d" }, - "TOML": { "type": "data", "bytes": 0, "lines": { "total": 1, "content": 0, "code": 0 }, "color": "#9c4221" } + "Perl": { "type": "programming", "bytes": 0, "lines": { "total": 1, "content": 0 },"color": "#0298c3" }, + "AGS Script": { "type": "programming", "bytes": 14, "lines": { "total": 2, "content": 1 },"color": "#B9D9FF" }, + "JSON": { "type": "data", "bytes": 8, "lines": { "total": 4, "content": 2 },"color": "#292929"}, + "JavaScript": { "type": "programming", "bytes": 23, "lines": { "total": 4, "content": 3 },"color": "#f1e05a" }, + "Text": { "type": "prose", "bytes": 0, "lines": { "total": 1, "content": 0 } }, + "C": { "type": "programming", "bytes": 130, "lines": { "total": 10, "content": 8 }, "color": "#555555"}, + "C++": { "type": "programming", "bytes": 15, "lines": { "total": 2, "content": 1 }, "color": "#f34b7d" }, + "TOML": { "type": "data", "bytes": 0, "lines": { "total": 1, "content": 0 }, "color": "#9c4221" } } }, "unknown": { "count": 1, "bytes": 9, - "lines": { "total": 2, "content": 1, "code": 1 }, + "lines": { "total": 2, "content": 1 }, "extensions": {}, "filenames": { "unknown": 9 From 5b77e9173df1f97e717e27714bd32c5a49fdb2b3 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Sat, 22 Feb 2025 16:32:28 +1300 Subject: [PATCH 07/15] Tweak modeline parsing Make it more performant and accurate. --- src/index.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index 158d8eb..c085562 100644 --- a/src/index.ts +++ b/src/index.ts @@ -244,8 +244,9 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom if (firstLine === null) continue; // Check first line for explicit classification + const modelineRegex = /-\*-|(?:syntax|filetype|ft)\s*=/; const hasShebang = opts.checkShebang && /^#!/.test(firstLine); - const hasModeline = opts.checkModeline && /-\*-|(syntax|filetype|ft)\s*=/.test(firstLine); + const hasModeline = opts.checkModeline && modelineRegex.test(firstLine); if (!opts.quick && (hasShebang || hasModeline)) { const matches = []; for (const [lang, data] of Object.entries(langData)) { @@ -258,7 +259,7 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom } // Check modeline declaration if (opts.checkModeline && hasModeline) { - const modelineText = firstLine.toLowerCase().replace(/^.*-\*-(.+)-\*-.*$/, '$1'); + const modelineText = firstLine.toLowerCase().split(modelineRegex)[1]; const matchesLang = modelineText.match(langMatcher(lang)); const matchesAlias = data.aliases?.some(lang => modelineText.match(langMatcher(lang))); if (matchesLang || matchesAlias) From c8e03914559fd143829108c6201674da6ad8282d Mon Sep 17 00:00:00 2001 From: Nixinova Date: Sat, 22 Feb 2025 16:40:01 +1300 Subject: [PATCH 08/15] Add `count` property per language result --- src/cli.ts | 3 ++- src/index.ts | 3 ++- src/types.ts | 1 + test/expected.json | 65 +++++++++++++++++++++++++++++++++++++++------- 4 files changed, 61 insertions(+), 11 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index 64387f5..bd87f71 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -83,12 +83,13 @@ if (args.analyze) (async () => { 'loc': n => n, }; const minBytesSize = conversionFactors[minSizeUnit](+minSizeAmt); - const other = { bytes: 0, lines: { total: 0, content: 0, code: 0 } }; + const other = { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 } }; // Apply specified minimums: delete language results that do not reach the threshold for (const [lang, data] of Object.entries(languages.results)) { const checkUnit = checkBytes ? data.bytes : data.lines.content; if (checkUnit < minBytesSize) { // Add to 'other' count + other.count++; other.bytes += data.bytes; other.lines.total += data.lines.total; other.lines.content += data.lines.content; diff --git a/src/index.ts b/src/index.ts index c085562..e43ff12 100644 --- a/src/index.ts +++ b/src/index.ts @@ -447,8 +447,9 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom } } // set default if unset - results.languages.results[lang] ??= { bytes: 0, lines: { total: 0, content: 0 } }; + results.languages.results[lang] ??= { count: 0, bytes: 0, lines: { total: 0, content: 0 } }; // apply results to 'languages' section + results.languages.results[lang].count++; results.languages.results[lang].bytes += fileSize; results.languages.bytes += fileSize; results.languages.results[lang].lines.total += loc.total; diff --git a/src/types.ts b/src/types.ts index 32f9d94..e9938d6 100644 --- a/src/types.ts +++ b/src/types.ts @@ -49,6 +49,7 @@ export interface Results { bytes: Bytes lines: LinesOfCode results: Record diff --git a/test/expected.json b/test/expected.json index 25950b1..27c7604 100644 --- a/test/expected.json +++ b/test/expected.json @@ -18,21 +18,68 @@ "~/unknown": null }, "alternatives": { - "~/alternatives.asc": [ "AsciiDoc", "Public Key" ] + "~/alternatives.asc": ["AsciiDoc", "Public Key"] } }, "languages": { "count": 8, "bytes": 190, "results": { - "Perl": { "type": "programming", "bytes": 0, "lines": { "total": 1, "content": 0 },"color": "#0298c3" }, - "AGS Script": { "type": "programming", "bytes": 14, "lines": { "total": 2, "content": 1 },"color": "#B9D9FF" }, - "JSON": { "type": "data", "bytes": 8, "lines": { "total": 4, "content": 2 },"color": "#292929"}, - "JavaScript": { "type": "programming", "bytes": 23, "lines": { "total": 4, "content": 3 },"color": "#f1e05a" }, - "Text": { "type": "prose", "bytes": 0, "lines": { "total": 1, "content": 0 } }, - "C": { "type": "programming", "bytes": 130, "lines": { "total": 10, "content": 8 }, "color": "#555555"}, - "C++": { "type": "programming", "bytes": 15, "lines": { "total": 2, "content": 1 }, "color": "#f34b7d" }, - "TOML": { "type": "data", "bytes": 0, "lines": { "total": 1, "content": 0 }, "color": "#9c4221" } + "Perl": { + "count": 1, + "type": "programming", + "bytes": 0, + "lines": { "total": 1, "content": 0 }, + "color": "#0298c3" + }, + "AGS Script": { + "count": 1, + "type": "programming", + "bytes": 14, + "lines": { "total": 2, "content": 1 }, + "color": "#B9D9FF" + }, + "JSON": { + "count": 2, + "type": "data", + "bytes": 8, + "lines": { "total": 4, "content": 2 }, + "color": "#292929" + }, + "JavaScript": { + "count": 3, + "type": "programming", + "bytes": 23, + "lines": { "total": 4, "content": 3 }, + "color": "#f1e05a" + }, + "Text": { + "count": 1, + "type": "prose", + "bytes": 0, + "lines": { "total": 1, "content": 0 } + }, + "C": { + "count": 1, + "type": "programming", + "bytes": 130, + "lines": { "total": 10, "content": 8 }, + "color": "#555555" + }, + "C++": { + "count": 1, + "type": "programming", + "bytes": 15, + "lines": { "total": 2, "content": 1 }, + "color": "#f34b7d" + }, + "TOML": { + "count": 1, + "type": "data", + "bytes": 0, + "lines": { "total": 1, "content": 0 }, + "color": "#9c4221" + } } }, "unknown": { From ec78248d51c2d10289f964716df9944587c897ca Mon Sep 17 00:00:00 2001 From: Nixinova Date: Mon, 24 Feb 2025 20:49:51 +1300 Subject: [PATCH 09/15] Change input schema Manual file content is now done as just the first argument. --- src/index.ts | 24 ++++++++++++++++-------- src/types.ts | 1 - test/unit.js | 2 +- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/index.ts b/src/index.ts index e43ff12..dc8fd8e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -20,10 +20,18 @@ const binaryData = JSON.parse( async function analyse(path?: string, opts?: T.Options): Promise async function analyse(paths?: string[], opts?: T.Options): Promise -async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Promise { - const useRawContent = opts.fileContent !== undefined; - const input = [rawPaths ?? []].flat(); - const manualFileContent = [opts.fileContent ?? []].flat(); +async function analyse(content?: Record, opts?: T.Options): Promise +async function analyse(rawInput?: string | string[] | Record, opts: T.Options = {}): Promise { + const inputs = { + path: typeof rawInput === 'string' ? rawInput : null, + paths: Array.isArray(rawInput) ? rawInput : null, + content: typeof rawInput === 'object' && !Array.isArray(rawInput) ? rawInput : null, + }; + const inputPaths = inputs.paths ?? (inputs.path ? [inputs.path] : null); + const inputContent = inputs.content; + const useRawContent = inputContent !== null; + + const input = useRawContent ? Object.keys(inputContent) : inputPaths ?? []; // Normalise input option arguments opts = { @@ -233,7 +241,7 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom // Check first line for readability let firstLine: string | null; if (useRawContent) { - firstLine = manualFileContent[files.indexOf(file)]?.split('\n')[0] ?? null; + firstLine = inputContent[file]?.split('\n')[0] ?? null; } else if (FS.existsSync(file) && !FS.lstatSync(file).isDirectory()) { firstLine = await readFileChunk(file, true).catch(() => null); @@ -353,7 +361,7 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom } // Check file contents and apply heuristic patterns - const fileContent = opts.fileContent ? manualFileContent[files.indexOf(file)] : await readFileChunk(file).catch(() => null); + const fileContent = useRawContent ? inputContent[file] : await readFileChunk(file).catch(() => null); // Skip if file read errors if (fileContent === null) continue; @@ -423,11 +431,11 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom for (const [file, lang] of Object.entries(results.files.results)) { if (lang && !langData[lang]) continue; // Calculate file size - const fileSize = manualFileContent[files.indexOf(file)]?.length ?? FS.statSync(file).size; + const fileSize = useRawContent ? inputContent[file]?.length : FS.statSync(file).size; // Calculate lines of code const loc = { total: 0, content: 0 }; if (opts.calculateLines) { - const fileContent = (manualFileContent[files.indexOf(file)] ?? FS.readFileSync(file).toString()) ?? ''; + const fileContent = useRawContent ? inputContent[file] : FS.readFileSync(file).toString(); const allLines = fileContent.split(/\r?\n/gm); loc.total = allLines.length; loc.content = allLines.filter(line => line.trim().length > 0).length; diff --git a/src/types.ts b/src/types.ts index e9938d6..3a90569 100644 --- a/src/types.ts +++ b/src/types.ts @@ -11,7 +11,6 @@ export type AbsFolder = string & {} export type FileGlob = string & {} export interface Options { - fileContent?: string | string[] ignoredFiles?: string[] ignoredLanguages?: Language[] categories?: Category[] diff --git a/test/unit.js b/test/unit.js index c902ec1..c5fc34b 100644 --- a/test/unit.js +++ b/test/unit.js @@ -8,7 +8,7 @@ function desc(text) { } async function test([filename, fileContent = ''], [type, testVal]) { - const actual = await linguist(filename, { fileContent, childLanguages: true }); + const actual = await linguist({ [filename]: fileContent }, { childLanguages: true }); const testContent = { 'files': actual.files.results[filename], 'size': actual.files.bytes, From 7e52945d0fcea7bd3187e8aba3bfc131ba01e142 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Tue, 9 Sep 2025 20:04:29 +1200 Subject: [PATCH 10/15] Update package setup --- build/download-files.ts | 2 +- package-lock.json | 61 ++++++++++++++++++++-------------------- package.json | 16 +++++------ src/helpers/load-data.ts | 4 ++- 4 files changed, 43 insertions(+), 40 deletions(-) diff --git a/build/download-files.ts b/build/download-files.ts index 5676638..a4008d6 100644 --- a/build/download-files.ts +++ b/build/download-files.ts @@ -4,7 +4,7 @@ import FS from 'fs'; import Path from 'path'; import YAML from 'js-yaml'; -import loadFile, { parseGeneratedDataFile } from '../src/helpers/load-data'; +import loadFile, { parseGeneratedDataFile } from '../src/helpers/load-data.ts'; async function writeFile(filename: string) { const filePath = Path.resolve('ext', filename); diff --git a/package-lock.json b/package-lock.json index 6e78c96..dd7f22e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,11 +9,11 @@ "version": "3.0.0-dev", "license": "ISC", "dependencies": { - "binary-extensions": "^3.0.0", - "commander": "^13.1.0", + "binary-extensions": "^3.1.0", + "commander": "^14.0.0", "common-path-prefix": "^3.0.0", - "ignore": "^7.0.3", - "isbinaryfile": "^5.0.4", + "ignore": "^7.0.5", + "isbinaryfile": "^5.0.6", "js-yaml": "^4.1.0", "node-cache": "^5.1.2" }, @@ -23,12 +23,12 @@ }, "devDependencies": { "@types/js-yaml": "^4.0.9", - "@types/node": "ts5.7", + "@types/node": "^24.3.1", "deep-object-diff": "^1.1.9", - "typescript": "~5.7.3" + "typescript": "^5.9.2" }, "engines": { - "node": ">=20", + "node": ">=24", "npm": ">=10" } }, @@ -39,12 +39,12 @@ "dev": true }, "node_modules/@types/node": { - "version": "22.13.1", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.1.tgz", - "integrity": "sha512-jK8uzQlrvXqEU91UxiK5J7pKHyzgnI1Qnl0QDHIgVGuolJhRb9EEl28Cj9b3rGR8B2lhFCtvIm5os8lFnO/1Ew==", + "version": "24.3.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.1.tgz", + "integrity": "sha512-3vXmQDXy+woz+gnrTvuvNrPzekOi+Ds0ReMxw0LzBiK3a+1k0kQn9f2NWk+lgD4rJehFUmYy2gMhJ2ZI+7YP9g==", "dev": true, "dependencies": { - "undici-types": "~6.20.0" + "undici-types": "~7.10.0" } }, "node_modules/argparse": { @@ -53,9 +53,9 @@ "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" }, "node_modules/binary-extensions": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-3.0.0.tgz", - "integrity": "sha512-X0RfwMgXPEesg6PCXzytQZt9Unh9gtc4SfeTNJvKifUL//Oegcc/Yf31z6hThNZ8dnD3Ir3wkHVN0eWrTvP5ww==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-3.1.0.tgz", + "integrity": "sha512-Jvvd9hy1w+xUad8+ckQsWA/V1AoyubOvqn0aygjMOVM4BfIaRav1NFS3LsTSDaV4n4FtcCtQXvzep1E6MboqwQ==", "engines": { "node": ">=18.20" }, @@ -72,11 +72,11 @@ } }, "node_modules/commander": { - "version": "13.1.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-13.1.0.tgz", - "integrity": "sha512-/rFeCpNJQbhSZjGVwO9RFV3xPqbnERS8MmIQzCtD/zl6gpJuV/bMLuN92oG3F7d8oDEHHRrujSXNUr8fpjntKw==", + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.0.tgz", + "integrity": "sha512-2uM9rYjPvyq39NwLRqaiLtWHyDC1FvryJDa2ATTVims5YAS4PupsEQsDvP14FqhFr0P49CYDugi59xaxJlTXRA==", "engines": { - "node": ">=18" + "node": ">=20" } }, "node_modules/common-path-prefix": { @@ -91,17 +91,17 @@ "dev": true }, "node_modules/ignore": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.3.tgz", - "integrity": "sha512-bAH5jbK/F3T3Jls4I0SO1hmPR0dKU0a7+SY6n1yzRtG54FLO8d6w/nxLFX2Nb7dBu6cCWXPaAME6cYqFUMmuCA==", + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", + "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", "engines": { "node": ">= 4" } }, "node_modules/isbinaryfile": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.4.tgz", - "integrity": "sha512-YKBKVkKhty7s8rxddb40oOkuP0NbaeXrQvLin6QMHL7Ypiy2RW9LwOVrVgZRyOrhQlayMd9t+D8yDy8MKFTSDQ==", + "version": "5.0.6", + "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.6.tgz", + "integrity": "sha512-I+NmIfBHUl+r2wcDd6JwE9yWje/PIVY/R5/CmV8dXLZd5K+L9X2klAOwfAHNnondLXkbHyTAleQAWonpTJBTtw==", "engines": { "node": ">= 18.0.0" }, @@ -132,10 +132,11 @@ } }, "node_modules/typescript": { - "version": "5.7.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.3.tgz", - "integrity": "sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw==", + "version": "5.9.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.2.tgz", + "integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==", "dev": true, + "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -145,9 +146,9 @@ } }, "node_modules/undici-types": { - "version": "6.20.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", - "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", + "version": "7.10.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz", + "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==", "dev": true } } diff --git a/package.json b/package.json index 14c75d4..babfd01 100644 --- a/package.json +++ b/package.json @@ -9,11 +9,11 @@ "linguist": "bin/index.js" }, "engines": { - "node": ">=20", + "node": ">=24", "npm": ">=10" }, "scripts": { - "download-files": "npx tsx@3 build/download-files", + "download-files": "node build/download-files", "pre-publish": "npm run download-files && npm test && npm run perf", "perf": "tsc && node test/perf", "test": "tsc && node test/folder && node test/unit" @@ -40,18 +40,18 @@ }, "homepage": "https://github.com/Nixinova/Linguist#readme", "dependencies": { - "binary-extensions": "^3.0.0", - "commander": "^13.1.0", + "binary-extensions": "^3.1.0", + "commander": "^14.0.0", "common-path-prefix": "^3.0.0", - "ignore": "^7.0.3", - "isbinaryfile": "^5.0.4", + "ignore": "^7.0.5", + "isbinaryfile": "^5.0.6", "js-yaml": "^4.1.0", "node-cache": "^5.1.2" }, "devDependencies": { "@types/js-yaml": "^4.0.9", - "@types/node": "ts5.7", + "@types/node": "^24.3.1", "deep-object-diff": "^1.1.9", - "typescript": "~5.7.3" + "typescript": "^5.9.2" } } diff --git a/src/helpers/load-data.ts b/src/helpers/load-data.ts index a8e4e89..420421b 100644 --- a/src/helpers/load-data.ts +++ b/src/helpers/load-data.ts @@ -1,8 +1,10 @@ import FS from 'node:fs'; import Path from 'node:path'; +import { fileURLToPath } from 'node:url'; import Cache from 'node-cache'; const cache = new Cache({}); +const dirname = Path.dirname(fileURLToPath(import.meta.url)); async function loadWebFile(file: string): Promise { // Return cache if it exists @@ -17,7 +19,7 @@ async function loadWebFile(file: string): Promise { } async function loadLocalFile(file: string): Promise { - const filePath = Path.resolve(__dirname, '../../ext', file); + const filePath = Path.resolve(dirname, "../../ext", file); return FS.promises.readFile(filePath).then(buffer => buffer.toString()); } From 46e7d9b2d3b3d392feba67efe1008549ea5200c8 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Tue, 9 Sep 2025 20:31:05 +1200 Subject: [PATCH 11/15] Set up prettier --- .prettierrc.json | 9 +++++++++ package-lock.json | 17 +++++++++++++++++ package.json | 1 + 3 files changed, 27 insertions(+) create mode 100644 .prettierrc.json diff --git a/.prettierrc.json b/.prettierrc.json new file mode 100644 index 0000000..327d902 --- /dev/null +++ b/.prettierrc.json @@ -0,0 +1,9 @@ +{ + "singleQuote": true, + "useTabs": true, + "tabWidth": 4, + "semi": true, + "trailingComma": "es5", + "printWidth": 140, + "endOfLine": "crlf" +} diff --git a/package-lock.json b/package-lock.json index dd7f22e..98d6bb4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -25,6 +25,7 @@ "@types/js-yaml": "^4.0.9", "@types/node": "^24.3.1", "deep-object-diff": "^1.1.9", + "prettier": "^3.6.2", "typescript": "^5.9.2" }, "engines": { @@ -131,6 +132,22 @@ "node": ">= 8.0.0" } }, + "node_modules/prettier": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.6.2.tgz", + "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==", + "dev": true, + "license": "MIT", + "bin": { + "prettier": "bin/prettier.cjs" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + }, "node_modules/typescript": { "version": "5.9.2", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.2.tgz", diff --git a/package.json b/package.json index babfd01..46d9ae2 100644 --- a/package.json +++ b/package.json @@ -52,6 +52,7 @@ "@types/js-yaml": "^4.0.9", "@types/node": "^24.3.1", "deep-object-diff": "^1.1.9", + "prettier": "^3.6.2", "typescript": "^5.9.2" } } From fa4ab8fff9936036a56d70fe715df3535fe66cb8 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Tue, 9 Sep 2025 20:32:11 +1200 Subject: [PATCH 12/15] Split up CLI --- src/cli.ts | 142 +++----------------------------------- src/cli/output/default.ts | 113 ++++++++++++++++++++++++++++++ src/cli/output/tree.ts | 14 ++++ src/cli/runCliAnalysis.ts | 28 ++++++++ src/cli/utils.ts | 10 +++ 5 files changed, 173 insertions(+), 134 deletions(-) create mode 100644 src/cli/output/default.ts create mode 100644 src/cli/output/tree.ts create mode 100644 src/cli/runCliAnalysis.ts create mode 100644 src/cli/utils.ts diff --git a/src/cli.ts b/src/cli.ts index bd87f71..e2e3cc5 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,15 +1,10 @@ -import FS from 'node:fs'; -import Path from 'node:path'; import { program } from 'commander'; -import linguist from './index.js'; -import { normPath } from './helpers/norm-path.js'; +import FS from 'node:fs'; +import runCliAnalysis from './cli/runCliAnalysis.js'; -const packageJson = JSON.parse(FS.readFileSync(new URL('../package.json', import.meta.url), "utf-8")); +const packageJson = JSON.parse(FS.readFileSync(new URL('../package.json', import.meta.url), 'utf-8')); const VERSION = packageJson.version; -const colouredMsg = ([r, g, b]: number[], msg: string): string => `\u001B[${38};2;${r};${g};${b}m${msg}${'\u001b[0m'}`; -const hexToRgb = (hex: string): number[] => [parseInt(hex.slice(1, 3), 16), parseInt(hex.slice(3, 5), 16), parseInt(hex.slice(5, 7), 16)]; - program .name('linguist') .usage('--analyze [] []') @@ -37,7 +32,7 @@ program .option('-M|--checkModeline [bool]', 'Check modelines for explicit classification', true) .helpOption(`-h|--help`, 'Display this help message') - .version(VERSION, '-v|--version', 'Display the installed version of linguist-js') + .version(VERSION, '-v|--version', 'Display the installed version of linguist-js'); program.parse(process.argv); const args = program.opts(); @@ -49,136 +44,15 @@ for (const arg in args) { val = val.replace(/^=/, ''); if (val.match(/true$|false$/)) val = val === 'true'; return val; - } + }; if (Array.isArray(args[arg])) args[arg] = args[arg].map(normalise); else args[arg] = normalise(args[arg]); } // Run Linguist -if (args.analyze) (async () => { - // Check arguments - const validCategories = ['data', 'programming', 'prose', 'markup']; - if (args.categories?.some((category: string) => !validCategories.includes(category))) { - console.log(`Error: '${args.categories.join(', ')}' contains an invalid category. Valid options: ${validCategories.join(', ')}.`); - return; - } - - // Fetch language data - const root = args.analyze === true ? '.' : args.analyze; - const data = await linguist(root, args); - const { files, languages, unknown, repository } = data; - // Print output - if (!args.json) { - // Ignore languages with a bytes/% size less than the declared min size - if (args.minSize) { - const totalSize = languages.bytes; - const minSizeAmt = parseFloat(args.minSize.replace(/[a-z]+$/i, '')); // '2KB' -> 2 - const minSizeUnit = args.minSize.replace(/^\d+/, '').toLowerCase(); // '2KB' -> 'kb' - const checkBytes = minSizeUnit !== 'loc'; // whether to check bytes or loc - const conversionFactors: Record number> = { - 'b': n => n, - 'kb': n => n * 1e3, - 'mb': n => n * 1e6, - '%': n => n * totalSize / 100, - 'loc': n => n, - }; - const minBytesSize = conversionFactors[minSizeUnit](+minSizeAmt); - const other = { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 } }; - // Apply specified minimums: delete language results that do not reach the threshold - for (const [lang, data] of Object.entries(languages.results)) { - const checkUnit = checkBytes ? data.bytes : data.lines.content; - if (checkUnit < minBytesSize) { - // Add to 'other' count - other.count++; - other.bytes += data.bytes; - other.lines.total += data.lines.total; - other.lines.content += data.lines.content; - // Remove language result - delete languages.results[lang]; - } - } - if (other.bytes) { - languages.results["Other"] = other; - } - } - - const sortedEntries = Object.entries(languages.results).sort((a, b) => (a[1].bytes < b[1].bytes ? +1 : -1)); - const totalBytes = languages.bytes; - console.log(`\n Analysed ${files.bytes.toLocaleString()} B from ${files.count} files with linguist-js`); - console.log(`\n Language analysis results: \n`); - let count = 0; - if (sortedEntries.length === 0) console.log(` None`); - - // Collate files per language - const filesPerLanguage: Record = {}; - if (args.listFiles) { - for (const language of Object.keys(languages.results)) { - filesPerLanguage[language] = []; - } - for (const [file, lang] of Object.entries(files.results)) { - if (lang) - filesPerLanguage[lang].push(file); - } - } - // List parsed results - for (const [lang, { bytes, lines }] of sortedEntries) { - const colour = hexToRgb(repository[lang].color ?? '#ededed'); - const percent = (bytes: number) => bytes / (totalBytes || 1) * 100; - const fmtd = { - index: (++count).toString().padStart(2, ' '), - lang: lang.padEnd(24, ' '), - percent: percent(bytes).toFixed(2).padStart(5, ' '), - bytes: bytes.toLocaleString().padStart(10, ' '), - loc: lines.content.toLocaleString().padStart(10, ' '), - icon: colouredMsg(colour, '\u2588'), - }; - console.log(` ${fmtd.index}. ${fmtd.icon} ${fmtd.lang} ${fmtd.percent}% ${fmtd.bytes} B ${fmtd.loc} LOC`); - - // If using `listFiles` option, list all files tagged as this language - if (args.listFiles) { - console.log(); // padding - for (const file of filesPerLanguage[lang]) { - let relFile = normPath(Path.relative(Path.resolve('.'), file)); - if (!relFile.startsWith('../')) relFile = './' + relFile; - const bytes = (await FS.promises.stat(file)).size; - const fmtd2 = { - file: relFile.padEnd(42, ' '), - percent: percent(bytes).toFixed(2).padStart(5, ' '), - bytes: bytes.toLocaleString().padStart(10, ' '), - } - console.log(` ${fmtd.icon} ${fmtd2.file} ${fmtd2.percent}% ${fmtd2.bytes} B`); - } - console.log(); // padding - } - } - if (!args.listFiles) console.log(); // padding - console.log(` Total: ${totalBytes.toLocaleString()} B`); - // List unknown files/extensions - if (unknown.bytes > 0) { - console.log(`\n Unknown files and extensions:`); - for (const [name, bytes] of Object.entries(unknown.filenames)) { - console.log(` '${name}': ${bytes.toLocaleString()} B`); - } - for (const [ext, bytes] of Object.entries(unknown.extensions)) { - console.log(` '*${ext}': ${bytes.toLocaleString()} B`); - } - console.log(` Total: ${unknown.bytes.toLocaleString()} B`); - } - } - else if (args.tree) { - const treeParts: string[] = args.tree.split('.'); - let nestedData: Record = data; - for (const part of treeParts) { - if (!nestedData[part]) throw Error(`TraversalError: Key '${part}' cannot be found on output object.`); - nestedData = nestedData[part]; - } - console.log(nestedData); - } - else { - console.dir(data, { depth: null }); - } -})(); -else { +if (args.analyze) { + void runCliAnalysis(args); +} else { console.log(`Welcome to linguist-js, a JavaScript port of GitHub's language analyzer.`); console.log(`Type 'linguist --help' for a list of commands.`); } diff --git a/src/cli/output/default.ts b/src/cli/output/default.ts new file mode 100644 index 0000000..8168cc8 --- /dev/null +++ b/src/cli/output/default.ts @@ -0,0 +1,113 @@ +import { OptionValues } from 'commander'; +import FS from 'node:fs'; +import Path from 'node:path'; +import { normPath } from '../../helpers/norm-path.js'; +import { Results } from '../../types.js'; +import { colouredMsg, hexToRgb } from '../utils.js'; + +export default async function defaultOutput(args: OptionValues, data: Results) { + const { files, languages, unknown, repository } = data; + + if (args.minSize) { + // Ignore languages with a bytes/% size less than the declared min size + + const totalSize = languages.bytes; + const minSizeAmt = parseFloat(args.minSize.replace(/[a-z]+$/i, '')); // '2KB' -> 2 + const minSizeUnit = args.minSize.replace(/^\d+/, '').toLowerCase(); // '2KB' -> 'kb' + const checkBytes = minSizeUnit !== 'loc'; // whether to check bytes or loc + const conversionFactors: Record number> = { + ['b']: (n) => n, + ['kb']: (n) => n * 1e3, + ['mb']: (n) => n * 1e6, + ['%']: (n) => (n * totalSize) / 100, + ['loc']: (n) => n, + }; + const minBytesSize = conversionFactors[minSizeUnit](+minSizeAmt); + const other = { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 } }; + // Apply specified minimums: delete language results that do not reach the threshold + for (const [lang, data] of Object.entries(languages.results)) { + const checkUnit = checkBytes ? data.bytes : data.lines.content; + if (checkUnit < minBytesSize) { + // Add to 'other' count + other.count++; + other.bytes += data.bytes; + other.lines.total += data.lines.total; + other.lines.content += data.lines.content; + // Remove language result + delete languages.results[lang]; + } + } + if (other.bytes) { + languages.results['Other'] = other; + } + } + + const sortedEntries = Object.entries(languages.results).sort((a, b) => (a[1].bytes < b[1].bytes ? +1 : -1)); + const totalBytes = languages.bytes; + console.log(`\n Analysed ${files.bytes.toLocaleString()} B from ${files.count} files with linguist-js`); + console.log(`\n Language analysis results: \n`); + let count = 0; + if (sortedEntries.length === 0) console.log(` None`); + + // Collate files per language + const filesPerLanguage: Record = {}; + if (args.listFiles) { + for (const language of Object.keys(languages.results)) { + filesPerLanguage[language] = []; + } + for (const [file, lang] of Object.entries(files.results)) { + if (lang) { + filesPerLanguage[lang].push(file); + } + } + } + // List parsed results + for (const [lang, { bytes, lines }] of sortedEntries) { + const colour = hexToRgb(repository[lang].color ?? '#ededed'); + const percent = (bytes: number) => (bytes / (totalBytes || 1)) * 100; + const fmtd = { + index: (++count).toString().padStart(2, ' '), + lang: lang.padEnd(24, ' '), + percent: percent(bytes).toFixed(2).padStart(5, ' '), + bytes: bytes.toLocaleString().padStart(10, ' '), + loc: lines.content.toLocaleString().padStart(10, ' '), + icon: colouredMsg(colour, '\u2588'), + }; + console.log(` ${fmtd.index}. ${fmtd.icon} ${fmtd.lang} ${fmtd.percent}% ${fmtd.bytes} B ${fmtd.loc} LOC`); + + // If using `listFiles` option, list all files tagged as this language + if (args.listFiles) { + console.log(); // padding + for (const file of filesPerLanguage[lang]) { + let relFile = normPath(Path.relative(Path.resolve('.'), file)); + if (!relFile.startsWith('../')) { + relFile = './' + relFile; + } + const fileStat = await FS.promises.stat(file); + const bytes = fileStat.size; + const fmtd2 = { + file: relFile.padEnd(42, ' '), + percent: percent(bytes).toFixed(2).padStart(5, ' '), + bytes: bytes.toLocaleString().padStart(10, ' '), + }; + console.log(` ${fmtd.icon} ${fmtd2.file} ${fmtd2.percent}% ${fmtd2.bytes} B`); + } + console.log(); // padding + } + } + if (!args.listFiles) { + console.log(); // padding + } + console.log(` Total: ${totalBytes.toLocaleString()} B`); + // List unknown files/extensions + if (unknown.bytes > 0) { + console.log(`\n Unknown files and extensions:`); + for (const [name, bytes] of Object.entries(unknown.filenames)) { + console.log(` '${name}': ${bytes.toLocaleString()} B`); + } + for (const [ext, bytes] of Object.entries(unknown.extensions)) { + console.log(` '*${ext}': ${bytes.toLocaleString()} B`); + } + console.log(` Total: ${unknown.bytes.toLocaleString()} B`); + } +} diff --git a/src/cli/output/tree.ts b/src/cli/output/tree.ts new file mode 100644 index 0000000..ee61ba4 --- /dev/null +++ b/src/cli/output/tree.ts @@ -0,0 +1,14 @@ +import { OptionValues } from 'commander'; +import { Results } from '../../types.js'; + +export default function treeOutput(args: OptionValues, data: Results) { + const treeParts: string[] = args.tree.split('.'); + let nestedData: Record = data; + for (const part of treeParts) { + if (!nestedData[part]) { + throw Error(`TraversalError: Key '${part}' cannot be found on output object.`); + } + nestedData = nestedData[part]; + } + console.log(nestedData); +} diff --git a/src/cli/runCliAnalysis.ts b/src/cli/runCliAnalysis.ts new file mode 100644 index 0000000..f6bbca1 --- /dev/null +++ b/src/cli/runCliAnalysis.ts @@ -0,0 +1,28 @@ +import { OptionValues } from 'commander'; +import linguist from '../index.js'; +import defaultOutput from './output/default.js'; +import treeOutput from './output/tree.js'; + +const validCategories = ['data', 'programming', 'prose', 'markup']; + +export default async function runCliAnalysis(args: OptionValues) { + // Check arguments + if (args.categories?.some((category: string) => !validCategories.includes(category))) { + console.log(`Error: '${args.categories.join(', ')}' contains an invalid category.`); + console.log(`\tValid options: ${validCategories.join(', ')}.`); + return; + } + + // Fetch language data + const root = args.analyze === true ? '.' : args.analyze; + const data = await linguist(root, args); + + // Print output + if (!args.json) { + defaultOutput(args, data); + } else if (args.tree) { + treeOutput(args, data); + } else { + console.dir(data, { depth: null }); + } +} diff --git a/src/cli/utils.ts b/src/cli/utils.ts new file mode 100644 index 0000000..6a2e80f --- /dev/null +++ b/src/cli/utils.ts @@ -0,0 +1,10 @@ +export function colouredMsg([r, g, b]: [number, number, number], msg: string): string { + return `\u001B[${38};2;${r};${g};${b}m${msg}\u001b[0m`; +} + +export function hexToRgb(hex: string): [number, number, number] { + const r = parseInt(hex.slice(1, 3), 16); + const g = parseInt(hex.slice(3, 5), 16); + const b = parseInt(hex.slice(5, 7), 16); + return [r, g, b]; +} From 848b9e7c82a3cf81a5e9c308d5c2a7186308e4bf Mon Sep 17 00:00:00 2001 From: Nixinova Date: Tue, 9 Sep 2025 20:41:40 +1200 Subject: [PATCH 13/15] Reorganise source files --- build/download-files.ts | 5 +- src/cli/output/default.ts | 4 +- src/cli/output/tree.ts | 2 +- src/index.ts | 225 +++++++++--------- .../load-data.ts => program/data/loadData.ts} | 12 +- .../fs/normalisedPath.ts} | 0 .../read-file.ts => program/fs/readFile.ts} | 0 .../walk-tree.ts => program/fs/walkTree.ts} | 39 ++- .../parsing/parseGItattributes.ts} | 6 +- .../parsing/parseGitignore.ts} | 0 .../convert-pcre.ts => program/utils/pcre.ts} | 0 src/{ => types}/schema.ts | 0 src/{ => types}/types.ts | 0 13 files changed, 144 insertions(+), 149 deletions(-) rename src/{helpers/load-data.ts => program/data/loadData.ts} (75%) rename src/{helpers/norm-path.ts => program/fs/normalisedPath.ts} (100%) rename src/{helpers/read-file.ts => program/fs/readFile.ts} (100%) rename src/{helpers/walk-tree.ts => program/fs/walkTree.ts} (79%) rename src/{helpers/parse-gitattributes.ts => program/parsing/parseGItattributes.ts} (89%) rename src/{helpers/parse-gitignore.ts => program/parsing/parseGitignore.ts} (100%) rename src/{helpers/convert-pcre.ts => program/utils/pcre.ts} (100%) rename src/{ => types}/schema.ts (100%) rename src/{ => types}/types.ts (100%) diff --git a/build/download-files.ts b/build/download-files.ts index a4008d6..e155758 100644 --- a/build/download-files.ts +++ b/build/download-files.ts @@ -1,10 +1,9 @@ #!/usr/bin/env tsx import FS from 'fs'; -import Path from 'path'; import YAML from 'js-yaml'; - -import loadFile, { parseGeneratedDataFile } from '../src/helpers/load-data.ts'; +import Path from 'path'; +import loadFile, { parseGeneratedDataFile } from '../src/program/data/loadData.ts'; async function writeFile(filename: string) { const filePath = Path.resolve('ext', filename); diff --git a/src/cli/output/default.ts b/src/cli/output/default.ts index 8168cc8..bb50a7b 100644 --- a/src/cli/output/default.ts +++ b/src/cli/output/default.ts @@ -1,8 +1,8 @@ import { OptionValues } from 'commander'; import FS from 'node:fs'; import Path from 'node:path'; -import { normPath } from '../../helpers/norm-path.js'; -import { Results } from '../../types.js'; +import { normPath } from '../../program/fs/normalisedPath.js'; +import { Results } from '../../types/types.js'; import { colouredMsg, hexToRgb } from '../utils.js'; export default async function defaultOutput(args: OptionValues, data: Results) { diff --git a/src/cli/output/tree.ts b/src/cli/output/tree.ts index ee61ba4..da61761 100644 --- a/src/cli/output/tree.ts +++ b/src/cli/output/tree.ts @@ -1,5 +1,5 @@ import { OptionValues } from 'commander'; -import { Results } from '../../types.js'; +import { Results } from '../../types/types.js'; export default function treeOutput(args: OptionValues, data: Results) { const treeParts: string[] = args.tree.split('.'); diff --git a/src/index.ts b/src/index.ts index dc8fd8e..20678ff 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,26 +1,25 @@ -import FS from 'node:fs'; -import Path from 'node:path'; -import YAML from 'js-yaml'; -import ignore, { Ignore } from 'ignore'; import commonPrefix from 'common-path-prefix'; +import ignore, { Ignore } from 'ignore'; import { isBinaryFile } from 'isbinaryfile'; - -import walk from './helpers/walk-tree.js'; -import loadFile, { parseGeneratedDataFile } from './helpers/load-data.js'; -import readFileChunk from './helpers/read-file.js'; -import parseAttributes, { FlagAttributes } from './helpers/parse-gitattributes.js'; -import pcre from './helpers/convert-pcre.js'; -import { normPath } from './helpers/norm-path.js'; -import * as T from './types.js'; -import * as S from './schema.js'; +import YAML from 'js-yaml'; +import FS from 'node:fs'; +import Path from 'node:path'; +import loadFile, { parseGeneratedDataFile } from './program/data/loadData.js'; +import { normPath } from './program/fs/normalisedPath.js'; +import readFileChunk from './program/fs/readFile.js'; +import walkTree from './program/fs/walkTree.js'; +import parseGitattributes, { FlagAttributes } from './program/parsing/parseGitattributes.js'; +import pcre from './program/utils/pcre.js'; +import * as S from './types/schema.js'; +import * as T from './types/types.js'; const binaryData = JSON.parse( - FS.readFileSync(new URL('../node_modules/binary-extensions/binary-extensions.json', import.meta.url), "utf-8") + FS.readFileSync(new URL('../node_modules/binary-extensions/binary-extensions.json', import.meta.url), 'utf-8') ) as string[]; -async function analyse(path?: string, opts?: T.Options): Promise -async function analyse(paths?: string[], opts?: T.Options): Promise -async function analyse(content?: Record, opts?: T.Options): Promise +async function analyse(path?: string, opts?: T.Options): Promise; +async function analyse(paths?: string[], opts?: T.Options): Promise; +async function analyse(content?: Record, opts?: T.Options): Promise; async function analyse(rawInput?: string | string[] | Record, opts: T.Options = {}): Promise { const inputs = { path: typeof rawInput === 'string' ? rawInput : null, @@ -31,7 +30,7 @@ async function analyse(rawInput?: string | string[] | Record, op const inputContent = inputs.content; const useRawContent = inputContent !== null; - const input = useRawContent ? Object.keys(inputContent) : inputPaths ?? []; + const input = useRawContent ? Object.keys(inputContent) : (inputPaths ?? []); // Normalise input option arguments opts = { @@ -65,10 +64,10 @@ async function analyse(rawInput?: string | string[] | Record, op }; // Set a common root path so that vendor paths do not incorrectly match parent folders - const resolvedInput = input.map(path => normPath(Path.resolve(path))); + const resolvedInput = input.map((path) => normPath(Path.resolve(path))); const commonRoot = (input.length > 1 ? commonPrefix(resolvedInput) : resolvedInput[0]).replace(/\/?$/, ''); - const relPath = (file: T.AbsFile): T.RelFile => useRawContent ? file : normPath(Path.relative(commonRoot, file)); - const unRelPath = (file: T.RelFile): T.AbsFile => useRawContent ? file : normPath(Path.resolve(commonRoot, file)); + const relPath = (file: T.AbsFile): T.RelFile => (useRawContent ? file : normPath(Path.relative(commonRoot, file))); + const unRelPath = (file: T.RelFile): T.AbsFile => (useRawContent ? file : normPath(Path.resolve(commonRoot, file))); // Other helper functions const fileMatchesGlobs = (file: T.AbsFile, ...globs: T.FileGlob[]) => ignore().add(globs).ignores(relPath(file)); @@ -80,24 +79,25 @@ async function analyse(rawInput?: string | string[] | Record, op const ignored = ignore(); ignored.add('.git/'); ignored.add(opts.ignoredFiles ?? []); - const regexIgnores: RegExp[] = opts.keepVendored ? [] : vendorPaths.map(path => RegExp(path, 'i')); + const regexIgnores: RegExp[] = opts.keepVendored ? [] : vendorPaths.map((path) => RegExp(path, 'i')); // Load file paths and folders let files: T.AbsFile[]; if (useRawContent) { // Uses raw file content files = input; - } - else { + } else { // Uses directory on disc - const data = walk({ init: true, commonRoot, folderRoots: resolvedInput, folders: resolvedInput, ignored }); + const data = walkTree({ init: true, commonRoot, folderRoots: resolvedInput, folders: resolvedInput, ignored }); files = data.files; } // Fetch and normalise gitattributes data of all subfolders and save to metadata const manualAttributes: Record = {}; // Maps file globs to gitattribute boolean flags const getFlaggedGlobs = (attr: keyof FlagAttributes, val: boolean) => { - return Object.entries(manualAttributes).filter(([, attrs]) => attrs[attr] === val).map(([glob,]) => glob) + return Object.entries(manualAttributes) + .filter(([, attrs]) => attrs[attr] === val) + .map(([glob]) => glob); }; const findAttrsForPath = (filePath: string): FlagAttributes | null => { const resultAttrs: Record = {}; @@ -114,14 +114,14 @@ async function analyse(rawInput?: string | string[] | Record, op return null; } return resultAttrs as FlagAttributes; - } + }; if (!useRawContent && opts.checkAttributes) { - const nestedAttrFiles = files.filter(file => file.endsWith('.gitattributes')); + const nestedAttrFiles = files.filter((file) => file.endsWith('.gitattributes')); for (const attrFile of nestedAttrFiles) { const relAttrFile = relPath(attrFile); const relAttrFolder = Path.dirname(relAttrFile); const contents = await readFileChunk(attrFile); - const parsed = parseAttributes(contents, relAttrFolder); + const parsed = parseGitattributes(contents, relAttrFolder); for (const { glob, attrs } of parsed) { manualAttributes[glob] = attrs; } @@ -133,7 +133,7 @@ async function analyse(rawInput?: string | string[] | Record, op for (const file of files) { const relFile = relPath(file); - const isRegexIgnored = regexIgnores.some(pattern => pattern.test(relFile)); + const isRegexIgnored = regexIgnores.some((pattern) => pattern.test(relFile)); if (!isRegexIgnored) { // Checking overrides is moot if file is not even marked as ignored by default continue; @@ -147,30 +147,38 @@ async function analyse(rawInput?: string | string[] | Record, op filesToIgnore.push(file); } } - files = files.filter(file => !filesToIgnore.includes(file)); + files = files.filter((file) => !filesToIgnore.includes(file)); // Apply vendor file path matches and filter out vendored files if (!opts.keepVendored) { // Get data of files that have been manually marked with metadata - const vendorTrueGlobs = [...getFlaggedGlobs('vendored', true), ...getFlaggedGlobs('generated', true), ...getFlaggedGlobs('documentation', true)]; - const vendorFalseGlobs = [...getFlaggedGlobs('vendored', false), ...getFlaggedGlobs('generated', false), ...getFlaggedGlobs('documentation', false)]; + const vendorTrueGlobs = [ + ...getFlaggedGlobs('vendored', true), + ...getFlaggedGlobs('generated', true), + ...getFlaggedGlobs('documentation', true), + ]; + const vendorFalseGlobs = [ + ...getFlaggedGlobs('vendored', false), + ...getFlaggedGlobs('generated', false), + ...getFlaggedGlobs('documentation', false), + ]; // Set up glob ignore object to use for expanding globs to match files const vendorTrueIgnore = ignore().add(vendorTrueGlobs); const vendorFalseIgnore = ignore().add(vendorFalseGlobs); // Remove all files marked as vendored by default - const excludedFiles = files.filter(file => vendorPaths.some(pathPtn => RegExp(pathPtn, 'i').test(relPath(file)))); - files = files.filter(file => !excludedFiles.includes(file)); + const excludedFiles = files.filter((file) => vendorPaths.some((pathPtn) => RegExp(pathPtn, 'i').test(relPath(file)))); + files = files.filter((file) => !excludedFiles.includes(file)); // Re-add removed files that are overridden manually in gitattributes - const overriddenExcludedFiles = excludedFiles.filter(file => vendorFalseIgnore.ignores(relPath(file))); + const overriddenExcludedFiles = excludedFiles.filter((file) => vendorFalseIgnore.ignores(relPath(file))); files.push(...overriddenExcludedFiles); // Remove files explicitly marked as vendored in gitattributes - files = files.filter(file => !vendorTrueIgnore.ignores(relPath(file))); + files = files.filter((file) => !vendorTrueIgnore.ignores(relPath(file))); } // Filter out binary files if (!opts.keepBinary) { // Filter out files that are binary by default - files = files.filter(file => !binaryData.some(ext => file.endsWith('.' + ext))); + files = files.filter((file) => !binaryData.some((ext) => file.endsWith('.' + ext))); // Filter out manually specified binary files const binaryIgnored = ignore().add(getFlaggedGlobs('binary', true)); files = filterOutIgnored(files, binaryIgnored); @@ -198,7 +206,7 @@ async function analyse(rawInput?: string | string[] | Record, op // If specified language is an alias, associate it with its full name if (!langData[forcedLang]) { - const overrideLang = Object.entries(langData).find(entry => entry[1].aliases?.includes(forcedLang!.toLowerCase())); + const overrideLang = Object.entries(langData).find((entry) => entry[1].aliases?.includes(forcedLang!.toLowerCase())); if (overrideLang) { forcedLang = overrideLang[0]; } @@ -215,7 +223,7 @@ async function analyse(rawInput?: string | string[] | Record, op } // Set parent to result group if it is present // Is nullish if either `opts.childLanguages` is set or if there is no group - const finalResult = !opts.childLanguages && result && langData[result] && langData[result].group || result; + const finalResult = (!opts.childLanguages && result && langData[result] && langData[result].group) || result; if (!fileAssociations[file].includes(finalResult)) { fileAssociations[file].push(finalResult); } @@ -225,8 +233,7 @@ async function analyse(rawInput?: string | string[] | Record, op const definiteness: Record = {}; const fromShebang: Record = {}; - fileLoop: - for (const file of files) { + fileLoop: for (const file of files) { // Check manual override for (const globMatch in globOverrides) { if (!fileMatchesGlobs(file, globMatch)) continue; @@ -242,11 +249,9 @@ async function analyse(rawInput?: string | string[] | Record, op let firstLine: string | null; if (useRawContent) { firstLine = inputContent[file]?.split('\n')[0] ?? null; - } - else if (FS.existsSync(file) && !FS.lstatSync(file).isDirectory()) { + } else if (FS.existsSync(file) && !FS.lstatSync(file).isDirectory()) { firstLine = await readFileChunk(file, true).catch(() => null); - } - else continue; + } else continue; // Skip if file is unreadable or blank if (firstLine === null) continue; @@ -261,25 +266,21 @@ async function analyse(rawInput?: string | string[] | Record, op const langMatcher = (lang: string) => `\\b${lang.toLowerCase().replace(/\W/g, '\\$&')}(?![\\w#+*]|-\*-)`; // Check for interpreter match if (opts.checkShebang && hasShebang) { - const matchesInterpretor = data.interpreters?.some(interpreter => firstLine.match(`\\b${interpreter}\\b`)); - if (matchesInterpretor) - matches.push(lang); + const matchesInterpretor = data.interpreters?.some((interpreter) => firstLine.match(`\\b${interpreter}\\b`)); + if (matchesInterpretor) matches.push(lang); } // Check modeline declaration if (opts.checkModeline && hasModeline) { const modelineText = firstLine.toLowerCase().split(modelineRegex)[1]; const matchesLang = modelineText.match(langMatcher(lang)); - const matchesAlias = data.aliases?.some(lang => modelineText.match(langMatcher(lang))); - if (matchesLang || matchesAlias) - matches.push(lang); + const matchesAlias = data.aliases?.some((lang) => modelineText.match(langMatcher(lang))); + if (matchesLang || matchesAlias) matches.push(lang); } } // Add identified language(s) if (matches.length) { - for (const match of matches) - addResult(file, match); - if (matches.length === 1) - definiteness[file] = true; + for (const match of matches) addResult(file, match); + if (matches.length === 1) definiteness[file] = true; fromShebang[file] = true; continue; } @@ -288,24 +289,24 @@ async function analyse(rawInput?: string | string[] | Record, op let skipExts = false; // Check if filename is a match for (const lang in langData) { - const matchesName = langData[lang].filenames?.some(name => Path.basename(file.toLowerCase()) === name.toLowerCase()); + const matchesName = langData[lang].filenames?.some((name) => Path.basename(file.toLowerCase()) === name.toLowerCase()); if (matchesName) { addResult(file, lang); skipExts = true; } } // Check if extension is a match - const possibleExts: { ext: string, lang: T.Language }[] = []; - if (!skipExts) for (const lang in langData) { - const extMatches = langData[lang].extensions?.filter(ext => file.toLowerCase().endsWith(ext.toLowerCase())); - if (extMatches?.length) { - for (const ext of extMatches) - possibleExts.push({ ext, lang }); + const possibleExts: { ext: string; lang: T.Language }[] = []; + if (!skipExts) + for (const lang in langData) { + const extMatches = langData[lang].extensions?.filter((ext) => file.toLowerCase().endsWith(ext.toLowerCase())); + if (extMatches?.length) { + for (const ext of extMatches) possibleExts.push({ ext, lang }); + } } - } // Apply more specific extension if available const isComplexExt = (ext: string) => /\..+\./.test(ext); - const hasComplexExt = possibleExts.some(data => isComplexExt(data.ext)); + const hasComplexExt = possibleExts.some((data) => isComplexExt(data.ext)); for (const { ext, lang } of possibleExts) { if (hasComplexExt && !isComplexExt(ext)) continue; if (!hasComplexExt && isComplexExt(ext)) continue; @@ -330,80 +331,75 @@ async function analyse(rawInput?: string | string[] | Record, op } // Parse heuristics if applicable - if (opts.checkHeuristics) for (const heuristics of heuristicsData.disambiguations) { - // Make sure the extension matches the current file - if (!fromShebang[file] && !heuristics.extensions.includes(extensions[file])) - continue; - // Load heuristic rules - for (const heuristic of heuristics.rules) { - // Make sure the language is not an array - if (Array.isArray(heuristic.language)) { - heuristic.language = heuristic.language[0]; - } + if (opts.checkHeuristics) + for (const heuristics of heuristicsData.disambiguations) { + // Make sure the extension matches the current file + if (!fromShebang[file] && !heuristics.extensions.includes(extensions[file])) continue; + // Load heuristic rules + for (const heuristic of heuristics.rules) { + // Make sure the language is not an array + if (Array.isArray(heuristic.language)) { + heuristic.language = heuristic.language[0]; + } - // Make sure the results includes this language - const languageGroup = langData[heuristic.language]?.group; - const matchesLang = fileAssociations[file].includes(heuristic.language); - const matchesParent = languageGroup && fileAssociations[file].includes(languageGroup); - if (!matchesLang && !matchesParent) - continue; - - // Normalise heuristic data - const patterns: string[] = []; - const normalise = (contents: string | string[]) => patterns.push(...[contents].flat()); - if (heuristic.pattern) normalise(heuristic.pattern); - if (heuristic.named_pattern) normalise(heuristicsData.named_patterns[heuristic.named_pattern]); - if (heuristic.and) { - for (const data of heuristic.and) { - if (data.pattern) normalise(data.pattern); - if (data.named_pattern) normalise(heuristicsData.named_patterns[data.named_pattern]); + // Make sure the results includes this language + const languageGroup = langData[heuristic.language]?.group; + const matchesLang = fileAssociations[file].includes(heuristic.language); + const matchesParent = languageGroup && fileAssociations[file].includes(languageGroup); + if (!matchesLang && !matchesParent) continue; + + // Normalise heuristic data + const patterns: string[] = []; + const normalise = (contents: string | string[]) => patterns.push(...[contents].flat()); + if (heuristic.pattern) normalise(heuristic.pattern); + if (heuristic.named_pattern) normalise(heuristicsData.named_patterns[heuristic.named_pattern]); + if (heuristic.and) { + for (const data of heuristic.and) { + if (data.pattern) normalise(data.pattern); + if (data.named_pattern) normalise(heuristicsData.named_patterns[data.named_pattern]); + } } - } - // Check file contents and apply heuristic patterns - const fileContent = useRawContent ? inputContent[file] : await readFileChunk(file).catch(() => null); + // Check file contents and apply heuristic patterns + const fileContent = useRawContent ? inputContent[file] : await readFileChunk(file).catch(() => null); - // Skip if file read errors - if (fileContent === null) continue; + // Skip if file read errors + if (fileContent === null) continue; - // Apply heuristics - if (!patterns.length || patterns.some(pattern => pcre(pattern).test(fileContent))) { - results.files.results[file] = heuristic.language; - break; + // Apply heuristics + if (!patterns.length || patterns.some((pattern) => pcre(pattern).test(fileContent))) { + results.files.results[file] = heuristic.language; + break; + } } } - } // If no heuristics, assign a language if (!results.files.results[file]) { const possibleLangs = fileAssociations[file]; // Assign first language as a default option const defaultLang = possibleLangs[0]; - const alternativeLangs = possibleLangs.slice(1) + const alternativeLangs = possibleLangs.slice(1); results.files.results[file] = defaultLang; // List alternative languages if there are any - if (alternativeLangs.length > 0) - results.files.alternatives[file] = alternativeLangs; + if (alternativeLangs.length > 0) results.files.alternatives[file] = alternativeLangs; } } // Skip specified categories if (opts.categories?.length) { const categories: T.Category[] = ['data', 'markup', 'programming', 'prose']; - const hiddenCategories = categories.filter(cat => !opts.categories!.includes(cat)); + const hiddenCategories = categories.filter((cat) => !opts.categories!.includes(cat)); for (const [file, lang] of Object.entries(results.files.results)) { // Skip if language is not hidden - if (!hiddenCategories.some(cat => lang && langData[lang]?.type === cat)) - continue; + if (!hiddenCategories.some((cat) => lang && langData[lang]?.type === cat)) continue; // Skip if language is forced as detectable if (opts.checkDetected) { const detectable = ignore().add(getFlaggedGlobs('detectable', true)); - if (detectable.ignores(relPath(file))) - continue; + if (detectable.ignores(relPath(file))) continue; } // Delete result otherwise delete results.files.results[file]; - if (lang) - delete results.languages.results[lang]; + if (lang) delete results.languages.results[lang]; } for (const category of hiddenCategories) { for (const [lang, { type }] of Object.entries(results.repository)) { @@ -438,7 +434,7 @@ async function analyse(rawInput?: string | string[] | Record, op const fileContent = useRawContent ? inputContent[file] : FS.readFileSync(file).toString(); const allLines = fileContent.split(/\r?\n/gm); loc.total = allLines.length; - loc.content = allLines.filter(line => line.trim().length > 0).length; + loc.content = allLines.filter((line) => line.trim().length > 0).length; } // Apply to files totals results.files.bytes += fileSize; @@ -464,8 +460,7 @@ async function analyse(rawInput?: string | string[] | Record, op results.languages.results[lang].lines.content += loc.content; results.languages.lines.total += loc.total; results.languages.lines.content += loc.content; - } - else { + } else { const ext = Path.extname(file); const unknownType = ext ? 'extensions' : 'filenames'; const name = ext || Path.basename(file); @@ -480,7 +475,7 @@ async function analyse(rawInput?: string | string[] | Record, op // Set lines output to NaN when line calculation is disabled if (opts.calculateLines === false) { - results.files.lines = { total: NaN, content: NaN } + results.files.lines = { total: NaN, content: NaN }; } // Set counts diff --git a/src/helpers/load-data.ts b/src/program/data/loadData.ts similarity index 75% rename from src/helpers/load-data.ts rename to src/program/data/loadData.ts index 420421b..1f3f4f0 100644 --- a/src/helpers/load-data.ts +++ b/src/program/data/loadData.ts @@ -1,7 +1,7 @@ +import Cache from 'node-cache'; import FS from 'node:fs'; import Path from 'node:path'; import { fileURLToPath } from 'node:url'; -import Cache from 'node-cache'; const cache = new Cache({}); const dirname = Path.dirname(fileURLToPath(import.meta.url)); @@ -13,19 +13,21 @@ async function loadWebFile(file: string): Promise { // Otherwise cache the request const dataUrl = (file: string): string => `https://raw.githubusercontent.com/github/linguist/HEAD/lib/linguist/${file}`; // Load file content, falling back to the local file if the request fails - const fileContent = await fetch(dataUrl(file)).then(data => data.text()).catch(async () => await loadLocalFile(file)); + const fileContent = await fetch(dataUrl(file)) + .then((data) => data.text()) + .catch(async () => await loadLocalFile(file)); cache.set(file, fileContent); return fileContent; } async function loadLocalFile(file: string): Promise { - const filePath = Path.resolve(dirname, "../../ext", file); - return FS.promises.readFile(filePath).then(buffer => buffer.toString()); + const filePath = Path.resolve(dirname, '../../ext', file); + return FS.promises.readFile(filePath).then((buffer) => buffer.toString()); } /** Nukes unused `generated.rb` file content. */ export function parseGeneratedDataFile(fileContent: string): string[] { - return [...fileContent.match(/(?<=name\.match\(\/).+?(?=(?; let allFolders: Set; interface WalkInput { /** Whether this is walking the tree from the root */ - init: boolean, + init: boolean; /** The common root absolute path of all folders being checked */ - commonRoot: string, + commonRoot: string; /** The absolute path that each folder is relative to */ - folderRoots: string[], + folderRoots: string[]; /** The absolute path of folders being checked */ - folders: string[], + folders: string[]; /** An instantiated Ignore object listing ignored files */ - ignored: Ignore, -}; + ignored: Ignore; +} interface WalkOutput { - files: string[], - folders: string[], -}; + files: string[]; + folders: string[]; +} /** Generate list of files in a directory. */ -export default function walk(data: WalkInput): WalkOutput { +export default function walkTree(data: WalkInput): WalkOutput { const { init, commonRoot, folderRoots, folders, ignored } = data; // Initialise files and folders lists @@ -41,7 +41,7 @@ export default function walk(data: WalkInput): WalkOutput { const localRoot = folderRoots[0].replace(commonRoot, '').replace(/^\//, ''); // Get list of files and folders inside this folder - const files = FS.readdirSync(folder).map(file => { + const files = FS.readdirSync(folder).map((file) => { // Create path relative to root const base = normAbsPath(folder, file).replace(commonRoot, '.'); // Add trailing slash to mark directories @@ -54,7 +54,7 @@ export default function walk(data: WalkInput): WalkOutput { if (FS.existsSync(gitignoreFilename)) { const gitignoreContents = FS.readFileSync(gitignoreFilename, 'utf-8'); const ignoredPaths = parseGitignore(gitignoreContents); - const rootRelIgnoredPaths = ignoredPaths.map(ignorePath => + const rootRelIgnoredPaths = ignoredPaths.map((ignorePath) => // get absolute path of the ignore glob normPath(folder, ignorePath) // convert abs ignore glob to be relative to the root folder @@ -88,9 +88,8 @@ export default function walk(data: WalkInput): WalkOutput { if (file.endsWith('/')) { // Recurse into subfolders allFolders.add(path); - walk({ init: false, commonRoot, folderRoots, folders: [path], ignored }); - } - else { + walkTree({ init: false, commonRoot, folderRoots, folders: [path], ignored }); + } else { // Add file path to list allFiles.add(path); } @@ -99,12 +98,12 @@ export default function walk(data: WalkInput): WalkOutput { // Recurse into all folders else { for (const i in folders) { - walk({ init: false, commonRoot, folderRoots: [folderRoots[i]], folders: [folders[i]], ignored }); + walkTree({ init: false, commonRoot, folderRoots: [folderRoots[i]], folders: [folders[i]], ignored }); } } // Return absolute files and folders lists return { - files: [...allFiles].map(file => file.replace(/^\./, commonRoot)), + files: [...allFiles].map((file) => file.replace(/^\./, commonRoot)), folders: [...allFolders], }; } diff --git a/src/helpers/parse-gitattributes.ts b/src/program/parsing/parseGItattributes.ts similarity index 89% rename from src/helpers/parse-gitattributes.ts rename to src/program/parsing/parseGItattributes.ts index 06655b0..138c334 100644 --- a/src/helpers/parse-gitattributes.ts +++ b/src/program/parsing/parseGItattributes.ts @@ -1,5 +1,5 @@ -import * as T from '../types.js'; -import { normPath } from './norm-path.js'; +import * as T from '../../types/types.js'; +import { normPath } from '../fs/normalisedPath.js'; export type FlagAttributes = { 'vendored': boolean | null, @@ -18,7 +18,7 @@ export type ParsedGitattributes = Array<{ /** * Parses a gitattributes file. */ -export default function parseAttributes(content: string, folderRoot: string = '.'): ParsedGitattributes { +export default function parseGitattributes(content: string, folderRoot: string = '.'): ParsedGitattributes { const output: ParsedGitattributes = []; for (const rawLine of content.split('\n')) { diff --git a/src/helpers/parse-gitignore.ts b/src/program/parsing/parseGitignore.ts similarity index 100% rename from src/helpers/parse-gitignore.ts rename to src/program/parsing/parseGitignore.ts diff --git a/src/helpers/convert-pcre.ts b/src/program/utils/pcre.ts similarity index 100% rename from src/helpers/convert-pcre.ts rename to src/program/utils/pcre.ts diff --git a/src/schema.ts b/src/types/schema.ts similarity index 100% rename from src/schema.ts rename to src/types/schema.ts diff --git a/src/types.ts b/src/types/types.ts similarity index 100% rename from src/types.ts rename to src/types/types.ts From 26162d002b441fe6911eba9788f1bffab4240ba9 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Tue, 9 Sep 2025 20:48:11 +1200 Subject: [PATCH 14/15] Only log failures in tests --- test/folder.js | 5 ++--- test/unit.js | 9 +++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/test/folder.js b/test/folder.js index 36919eb..da19c71 100644 --- a/test/folder.js +++ b/test/folder.js @@ -1,11 +1,11 @@ +import { updatedDiff } from 'deep-object-diff'; import FS from 'node:fs'; import { dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; -import { updatedDiff } from 'deep-object-diff'; import linguist from '../dist/index.js'; async function testFolder() { - console.info('-'.repeat(11) + '\nFolder test\n' + '-'.repeat(11)); + console.info('-'.repeat(11) + ' Folder test ' + '-'.repeat(11)); const curFolder = dirname(fileURLToPath(import.meta.url)); const samplesFolder = curFolder.replace(/\\/g, '/') + '/samples'; const expectedJson = FS.readFileSync(curFolder + '/expected.json', { encoding: 'utf8' }); @@ -13,7 +13,6 @@ async function testFolder() { const actual = await linguist(samplesFolder); const diff = updatedDiff(expected, actual); - console.dir(actual, { depth: null }); if (JSON.stringify(diff) === '{}') { console.info('Results match expected'); } diff --git a/test/unit.js b/test/unit.js index c5fc34b..aa905a5 100644 --- a/test/unit.js +++ b/test/unit.js @@ -4,7 +4,7 @@ let i = 0; let errors = 0; function desc(text) { - console.info(`Testing: ${text}`); + console.info(` Testing: ${text}`); } async function test([filename, fileContent = ''], [type, testVal]) { @@ -17,17 +17,14 @@ async function test([filename, fileContent = ''], [type, testVal]) { }[type]; const result = testContent === testVal; i = `${+i + 1}`.padStart(2, '0'); - if (result) { - console.info(`- #${i} passed: '${filename}' is ${testVal}`); - } - else { + if (!result) { errors++; console.error(`! #${i} failed: '${filename}' is ${testContent} instead of ${testVal}`); } } async function unitTest() { - console.info('-'.repeat(10) + '\nUnit tests\n' + '-'.repeat(10)); + console.info('-'.repeat(10) + ' Unit tests ' + '-'.repeat(10)); desc('metadata'); await test(['file_size', '0123456789'], ['size', 10]); await test(['empty', ''], ['size', 0]); From 31915e2128371d2e76905a3ba82fd5c46c54cc27 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Tue, 9 Sep 2025 22:18:47 +1200 Subject: [PATCH 15/15] Extract some standalone data --- src/index.ts | 59 +++++-------------- src/program/classes/attributes.ts | 46 +++++++++++++++ .../data/{loadData.ts => loadDataFiles.ts} | 2 +- src/program/data/retrieveData.ts | 41 +++++++++++++ 4 files changed, 104 insertions(+), 44 deletions(-) create mode 100644 src/program/classes/attributes.ts rename src/program/data/{loadData.ts => loadDataFiles.ts} (93%) create mode 100644 src/program/data/retrieveData.ts diff --git a/src/index.ts b/src/index.ts index 20678ff..22a6c9c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,16 +1,15 @@ import commonPrefix from 'common-path-prefix'; import ignore, { Ignore } from 'ignore'; import { isBinaryFile } from 'isbinaryfile'; -import YAML from 'js-yaml'; import FS from 'node:fs'; import Path from 'node:path'; -import loadFile, { parseGeneratedDataFile } from './program/data/loadData.js'; +import Attributes from './program/classes/attributes.js'; +import retrieveData from './program/data/retrieveData.js'; import { normPath } from './program/fs/normalisedPath.js'; import readFileChunk from './program/fs/readFile.js'; import walkTree from './program/fs/walkTree.js'; -import parseGitattributes, { FlagAttributes } from './program/parsing/parseGitattributes.js'; +import parseGitattributes from './program/parsing/parseGitattributes.js'; import pcre from './program/utils/pcre.js'; -import * as S from './types/schema.js'; import * as T from './types/types.js'; const binaryData = JSON.parse( @@ -45,12 +44,7 @@ async function analyse(rawInput?: string | string[] | Record, op }; // Load data from github-linguist web repo - const langData = await loadFile('languages.yml', opts.offline).then(YAML.load); - const vendorData = await loadFile('vendor.yml', opts.offline).then(YAML.load); - const docData = await loadFile('documentation.yml', opts.offline).then(YAML.load); - const heuristicsData = await loadFile('heuristics.yml', opts.offline).then(YAML.load); - const generatedData = await loadFile('generated.rb', opts.offline).then(parseGeneratedDataFile); - const vendorPaths = [...vendorData, ...docData, ...generatedData]; + const { langData, heuristicsData, vendorPaths } = await retrieveData(opts.offline ?? false); // Setup main variables const fileAssociations: Record = {}; @@ -93,28 +87,7 @@ async function analyse(rawInput?: string | string[] | Record, op } // Fetch and normalise gitattributes data of all subfolders and save to metadata - const manualAttributes: Record = {}; // Maps file globs to gitattribute boolean flags - const getFlaggedGlobs = (attr: keyof FlagAttributes, val: boolean) => { - return Object.entries(manualAttributes) - .filter(([, attrs]) => attrs[attr] === val) - .map(([glob]) => glob); - }; - const findAttrsForPath = (filePath: string): FlagAttributes | null => { - const resultAttrs: Record = {}; - for (const glob in manualAttributes) { - if (ignore().add(glob).ignores(relPath(filePath))) { - const matchingAttrs = manualAttributes[glob]; - for (const [attr, val] of Object.entries(matchingAttrs)) { - if (val !== null) resultAttrs[attr] = val; - } - } - } - - if (!JSON.stringify(resultAttrs)) { - return null; - } - return resultAttrs as FlagAttributes; - }; + const manualAttributes = new Attributes(); if (!useRawContent && opts.checkAttributes) { const nestedAttrFiles = files.filter((file) => file.endsWith('.gitattributes')); for (const attrFile of nestedAttrFiles) { @@ -123,7 +96,7 @@ async function analyse(rawInput?: string | string[] | Record, op const contents = await readFileChunk(attrFile); const parsed = parseGitattributes(contents, relAttrFolder); for (const { glob, attrs } of parsed) { - manualAttributes[glob] = attrs; + manualAttributes.add(glob, attrs); } } } @@ -139,7 +112,7 @@ async function analyse(rawInput?: string | string[] | Record, op continue; } - const fileAttrs = findAttrsForPath(file); + const fileAttrs = manualAttributes.findAttrsForPath(relPath(file)); if (fileAttrs?.generated === false || fileAttrs?.vendored === false) { // File is explicitly marked as *not* to be ignored // do nothing @@ -153,14 +126,14 @@ async function analyse(rawInput?: string | string[] | Record, op if (!opts.keepVendored) { // Get data of files that have been manually marked with metadata const vendorTrueGlobs = [ - ...getFlaggedGlobs('vendored', true), - ...getFlaggedGlobs('generated', true), - ...getFlaggedGlobs('documentation', true), + ...manualAttributes.getFlaggedGlobs('vendored', true), + ...manualAttributes.getFlaggedGlobs('generated', true), + ...manualAttributes.getFlaggedGlobs('documentation', true), ]; const vendorFalseGlobs = [ - ...getFlaggedGlobs('vendored', false), - ...getFlaggedGlobs('generated', false), - ...getFlaggedGlobs('documentation', false), + ...manualAttributes.getFlaggedGlobs('vendored', false), + ...manualAttributes.getFlaggedGlobs('generated', false), + ...manualAttributes.getFlaggedGlobs('documentation', false), ]; // Set up glob ignore object to use for expanding globs to match files const vendorTrueIgnore = ignore().add(vendorTrueGlobs); @@ -180,10 +153,10 @@ async function analyse(rawInput?: string | string[] | Record, op // Filter out files that are binary by default files = files.filter((file) => !binaryData.some((ext) => file.endsWith('.' + ext))); // Filter out manually specified binary files - const binaryIgnored = ignore().add(getFlaggedGlobs('binary', true)); + const binaryIgnored = ignore().add(manualAttributes.getFlaggedGlobs('binary', true)); files = filterOutIgnored(files, binaryIgnored); // Re-add files manually marked not as binary - const binaryUnignored = ignore().add(getFlaggedGlobs('binary', false)); + const binaryUnignored = ignore().add(manualAttributes.getFlaggedGlobs('binary', false)); const unignoredList = filterOutIgnored(files, binaryUnignored); files.push(...unignoredList); } @@ -394,7 +367,7 @@ async function analyse(rawInput?: string | string[] | Record, op if (!hiddenCategories.some((cat) => lang && langData[lang]?.type === cat)) continue; // Skip if language is forced as detectable if (opts.checkDetected) { - const detectable = ignore().add(getFlaggedGlobs('detectable', true)); + const detectable = ignore().add(manualAttributes.getFlaggedGlobs('detectable', true)); if (detectable.ignores(relPath(file))) continue; } // Delete result otherwise diff --git a/src/program/classes/attributes.ts b/src/program/classes/attributes.ts new file mode 100644 index 0000000..5fd7091 --- /dev/null +++ b/src/program/classes/attributes.ts @@ -0,0 +1,46 @@ +import ignore from 'ignore'; +import { FileGlob, RelFile } from '../../types/types.js'; +import { FlagAttributes } from '../parsing/parseGitattributes.js'; + +/** Stores parsed attribute information per file glob */ +export default class Attributes { + #attributes: Record; + + constructor() { + this.#attributes = {}; + } + + get attributes() { + return this.#attributes; + } + + add(glob: FileGlob, attributes: FlagAttributes) { + this.#attributes[glob] = attributes; + } + + getFlaggedGlobs(attr: keyof FlagAttributes, val: boolean) { + return Object.entries(this.#attributes) + .filter(([, attrs]) => attrs[attr] === val) + .map(([glob]) => glob); + } + + findAttrsForPath(relFilePath: RelFile): FlagAttributes | null { + const resultAttrs: Record = {}; + for (const glob in this.#attributes) { + const matchingAttrs = this.#attributes[glob]; + // Check if glob matches rel path + if (ignore().add(glob).ignores(relFilePath)) { + for (const [attr, val] of Object.entries(matchingAttrs)) { + if (val !== null) { + resultAttrs[attr] = val; + } + } + } + } + + if (!JSON.stringify(resultAttrs).length) { + return null; + } + return resultAttrs as FlagAttributes; + } +} diff --git a/src/program/data/loadData.ts b/src/program/data/loadDataFiles.ts similarity index 93% rename from src/program/data/loadData.ts rename to src/program/data/loadDataFiles.ts index 1f3f4f0..5568d0e 100644 --- a/src/program/data/loadData.ts +++ b/src/program/data/loadDataFiles.ts @@ -31,6 +31,6 @@ export function parseGeneratedDataFile(fileContent: string): string[] { } /** Load a data file from github-linguist. */ -export default function loadFile(file: string, offline: boolean = false): Promise { +export function loadFile(file: string, offline: boolean = false): Promise { return offline ? loadLocalFile(file) : loadWebFile(file); } diff --git a/src/program/data/retrieveData.ts b/src/program/data/retrieveData.ts new file mode 100644 index 0000000..cc91906 --- /dev/null +++ b/src/program/data/retrieveData.ts @@ -0,0 +1,41 @@ +import YAML from 'js-yaml'; +import { HeuristicsSchema, LanguagesScema, VendorSchema } from '../../types/schema.js'; +import { loadFile, parseGeneratedDataFile } from './loadDataFiles.js'; + +type LoadedData = { + langData: LanguagesScema; + vendorData: VendorSchema; + docData: VendorSchema; + heuristicsData: HeuristicsSchema; + generatedData: string[]; + vendorPaths: string[]; +}; + +let data: LoadedData = null!; + +async function initRetrieveData(offline: boolean): Promise { + // Only load the data on mont + if (data) return; + + const langData = (await loadFile('languages.yml', offline).then(YAML.load)) as LanguagesScema; + const vendorData = (await loadFile('vendor.yml', offline).then(YAML.load)) as VendorSchema; + const docData = (await loadFile('documentation.yml', offline).then(YAML.load)) as VendorSchema; + const heuristicsData = (await loadFile('heuristics.yml', offline).then(YAML.load)) as HeuristicsSchema; + const generatedData = (await loadFile('generated.rb', offline).then(parseGeneratedDataFile)) as string[]; + const vendorPaths = [...vendorData, ...docData, ...generatedData]; + + data = { + langData, + vendorData, + docData, + heuristicsData, + generatedData, + vendorPaths, + }; +} + +/** Load data from github-linguist web repo or cached local file. */ +export default async function retrieveData(offline: boolean): Promise { + await initRetrieveData(offline); + return data; +}