diff --git a/bin/index.js b/bin/index.js index 2470305..aa91d40 100755 --- a/bin/index.js +++ b/bin/index.js @@ -71,6 +71,13 @@ yarg.command({ demandOption: true, type: 'string', }, + url:{ + alias: 'u', + describe: "Url to start Crawling", + type: "string", + demandOption: false, + type: 'string', + }, output: { alias: 'o', describe: 'Output file for the list of URLs', @@ -83,6 +90,12 @@ yarg.command({ fs.accessSync(argv.config, fs.constants.F_OK); console.log(chalk.green(`[INFO] Config file "${argv.config}" exists.`)); const config = JSON.parse(fs.readFileSync(argv.config, 'utf-8')); + if(argv.url){ + const urlObj = new URL(argv.url); + const domainRegex = `https?://${urlObj.hostname}(?:/.*|)`; + config.crawler.entryPoint = argv.url; + config.crawler.includeRegexes = [domainRegex]; + } const {error, value} = configModel.validate(config); if (error) { console.error(chalk.red(`[ERROR] ${error.message}`)); diff --git a/config.json b/config.json new file mode 100644 index 0000000..29a33ce --- /dev/null +++ b/config.json @@ -0,0 +1,43 @@ +{ + "browser": { + "headless": false, + "maximize": true, + "proxy": { + "enabled": false, + "host": "127.0.0.1", + "port": 8080 + }, + "instances": 4 + }, + "crawler": { + "entryPoint": "https://security-crawl-maze.app/", + "eventTimeout": 10000, + "navigationTimeout": 30000, + "eventWait": 0, + "maxDuration": 0, + "elements": [ + "a", + "button", + "input[type=\"submit\"]" + ], + "maxChildren": 0, + "maxDepth": 10, + "authentication": { + "basicAuth": { + "enabled": false, + "username": "username", + "password": "password" + }, + "recorderAuth": { + "enabled": false, + "pptrRecording": "/path/to/login/recording" + } + }, + "includeRegexes": [ + "https?://security-crawl-maze.app(?:/.*|)" + ], + "excludeRegexes": [ + ".*logout.*" + ] + } +} \ No newline at end of file diff --git a/package.json b/package.json index 4744344..923d771 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,27 @@ "name": "sasori-crawl", "version": "1.0.0", "description": "Sasori is a dynamic web crawler powered by Puppeteer, designed for lightning-fast endpoint discovery.", - "main": "bin/index.js", + "keywords": [ + "crawler", + "crawling", + "scraping", + "endpoint-discovery", + "puppeteer", + "dynamic", + "automation", + "security", + "dast", + "infosec" + ], + "homepage": "https://github.com/karthikuj/sasori#readme", + "bugs": { + "url": "https://github.com/karthikuj/sasori/issues" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/karthikuj/sasori.git" + }, + "license": "MIT", "author": "Karthik UJ", "contributors": [ { @@ -11,11 +31,8 @@ "url": "https://www.5up3r541y4n.tech/" } ], - "license": "MIT", - "repository": { - "type": "git", - "url": "git+https://github.com/karthikuj/sasori.git" - }, + "type": "commonjs", + "main": "bin/index.js", "bin": { "sasori": "bin/index.js" }, @@ -23,18 +40,6 @@ "start": "node .", "lint": "npx eslint . --fix" }, - "keywords": [ - "crawler", - "crawling", - "scraping", - "endpoint-discovery", - "puppeteer", - "dynamic", - "automation", - "security", - "dast", - "infosec" - ], "dependencies": { "@puppeteer/replay": "^2.13.4", "chalk": "^4",