-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/server/index.ts b/server/index.ts
index db91e89..fb71778 100644
--- a/server/index.ts
+++ b/server/index.ts
@@ -1,3 +1,4 @@
+import crypto from 'crypto';
import express, { type Request, Response, NextFunction, Router, type Express } from "express";
import { createServer } from "http";
import githubRoutes from "./routes/github.js";
@@ -5,6 +6,7 @@ import deployRoutes from "./routes/deploy.js";
import userRoutes from "./routes/user.js";
import healthRoutes from "./routes/health.js";
import sourcesRoutes from "./routes/sources.js";
+import { redactSensitiveData } from "./lib/security.js";
// Validate required environment variables at startup
function validateEnvironment(): void {
@@ -17,11 +19,9 @@ function validateEnvironment(): void {
const missing = required.filter(key => !process.env[key]);
if (missing.length > 0) {
- console.error('❌ Missing required environment variables:');
- missing.forEach(key => console.error(` - ${key}`));
- console.error('\nPlease check your .env file or environment configuration.');
- console.error('See .env.example for reference.\n');
- process.exit(1);
+ console.warn('⚠️ Missing environment variables (some features will be unavailable):');
+ missing.forEach(key => console.warn(` - ${key}`));
+ console.warn(' See .env.example for reference.\n');
}
// Validate optional but recommended variables
@@ -48,7 +48,7 @@ app.use(express.urlencoded({ extended: false }));
app.use((req, res, next) => {
// Use existing request ID from header or generate new one
const requestId = req.headers['x-request-id'] as string ||
- `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
+ `req_${Date.now()}_${crypto.randomUUID()}`;
// Store on request and response
(req as any).id = requestId;
@@ -99,15 +99,22 @@ app.use((req, res, next) => {
// Check if request origin is allowed
if (requestOrigin && allowedOrigins.includes(requestOrigin)) {
res.setHeader('Access-Control-Allow-Origin', requestOrigin);
+ res.setHeader('Access-Control-Allow-Credentials', 'true');
} else if (process.env.NODE_ENV === 'development') {
- // In development, allow any origin for flexibility
- res.setHeader('Access-Control-Allow-Origin', '*');
+ // In development, echo back the request origin (or allow any)
+ // Use the specific origin to remain compatible with credentials
+ if (requestOrigin) {
+ res.setHeader('Access-Control-Allow-Origin', requestOrigin);
+ res.setHeader('Access-Control-Allow-Credentials', 'true');
+ } else {
+ res.setHeader('Access-Control-Allow-Origin', '*');
+ }
}
// In production, if origin not allowed, don't set the header (request will be rejected)
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, PATCH, OPTIONS');
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
- res.setHeader('Access-Control-Allow-Credentials', 'true');
+ res.setHeader('Vary', 'Origin');
if (req.method === 'OPTIONS') {
return res.status(200).end();
@@ -146,7 +153,8 @@ app.use((req, res, next) => {
if (path.startsWith("/api")) {
let logLine = `[${requestId}] ${req.method} ${path} ${res.statusCode} in ${duration}ms`;
if (capturedJsonResponse) {
- logLine += ` :: ${JSON.stringify(capturedJsonResponse)}`;
+ const redactedResponse = redactSensitiveData(capturedJsonResponse);
+ logLine += ` :: ${JSON.stringify(redactedResponse)}`;
}
if (logLine.length > 80) {
@@ -179,7 +187,7 @@ app.use((req, res, next) => {
app.use((err: any, req: Request, res: Response, _next: NextFunction) => {
const status = err.status || err.statusCode || 500;
const message = err.message || "Internal Server Error";
- const requestId = req.headers['x-request-id'] || 'unknown';
+ const requestId = (req as any).id || res.getHeader('X-Request-ID') || req.headers['x-request-id'] || 'unknown';
// Log error with full context for debugging and monitoring
// In production, consider integrating with error tracking services (e.g., Sentry)
diff --git a/server/lib/bitbucket.ts b/server/lib/bitbucket.ts
index c309acb..991d242 100644
--- a/server/lib/bitbucket.ts
+++ b/server/lib/bitbucket.ts
@@ -259,9 +259,10 @@ export async function getBitbucketRepositoriesWithTitles(
await new Promise(resolve => setTimeout(resolve, 200));
}
- // Extract workspace from URL or use owner login
- const workspace = repo.owner.login;
- const repoSlug = repo.name.toLowerCase().replace(/\s+/g, '-');
+ // Extract workspace and slug from the repo URL (https://bitbucket.org/{workspace}/{slug})
+ const urlParts = new URL(repo.url).pathname.split('/').filter(Boolean);
+ const workspace = urlParts[0] || repo.owner.login;
+ const repoSlug = urlParts[1] || repo.name.toLowerCase().replace(/\s+/g, '-');
const readme = await getBitbucketReadme(workspace, repoSlug, username, appPassword);
const displayName = extractTitleFromReadme(readme);
diff --git a/server/lib/github.ts b/server/lib/github.ts
index 35b4f91..960ecf3 100644
--- a/server/lib/github.ts
+++ b/server/lib/github.ts
@@ -300,20 +300,21 @@ export async function getRepositories(
orgs = [];
}
- // Get repositories for each organization with individual error handling
- const orgRepos: Repository[] = [];
- for (const org of orgs) {
+ // Get repositories for each organization concurrently with individual error handling
+ const orgReposPromises = orgs.map(async (org) => {
try {
- const repos = await getOrganizationRepositories(octokit, {
+ return await getOrganizationRepositories(octokit, {
login: org.login,
avatarUrl: org.avatarUrl,
});
- orgRepos.push(...repos);
} catch (error) {
console.warn(`Failed to fetch repositories for organization ${org.login}, skipping:`, error);
- // Continue with other organizations
+ return [];
}
- }
+ });
+
+ const orgReposResults = await Promise.all(orgReposPromises);
+ const orgRepos = orgReposResults.flat();
// Combine user and organization repositories, but deduplicate based on repo ID
const allRepos = [...userRepos, ...orgRepos];
@@ -347,8 +348,8 @@ export async function getRepositories(
console.log(`Processing README batch ${batchNumber}/${totalBatches} (${batch.length} repos)`);
- const results = await Promise.allSettled(
- batch.map(async (repo) => {
+ const results = await Promise.all(
+ batch.map(async (repo): Promise<{ repo: string; success: boolean }> => {
try {
const readme = await getReadmeContent(
octokit,
@@ -361,6 +362,7 @@ export async function getRepositories(
if (displayName) {
repo.displayName = displayName;
}
+ return { repo: `${repo.owner.login}/${repo.name}`, success: true };
} catch (error) {
console.warn(
`Failed to process README for ${repo.owner.login}/${repo.name}:`,
@@ -368,12 +370,13 @@ export async function getRepositories(
);
// Continue with other repositories, don't set displayName
repo.displayName = null;
+ return { repo: `${repo.owner.login}/${repo.name}`, success: false };
}
}),
);
// Log batch results for monitoring
- const failures = results.filter(r => r.status === 'rejected');
+ const failures = results.filter(r => !r.success);
if (failures.length > 0) {
console.warn(`Batch ${batchNumber}: ${failures.length}/${batch.length} failures`);
}
@@ -445,10 +448,9 @@ export async function getReadmeContent(
owner: string,
repo: string,
): Promise
{
- const octokit =
- typeof clientOrToken === "string"
- ? new Octokit({ auth: clientOrToken })
- : clientOrToken;
+ const octokit = typeof clientOrToken === 'string'
+ ? new Octokit({ auth: clientOrToken })
+ : clientOrToken;
try {
const { data } = await octokit.repos.getReadme({
diff --git a/server/lib/gitlab.ts b/server/lib/gitlab.ts
index 5bb66f8..f07c045 100644
--- a/server/lib/gitlab.ts
+++ b/server/lib/gitlab.ts
@@ -64,8 +64,12 @@ export async function getGitLabUser(accessToken: string): Promise {
*/
export async function getGitLabProjects(
accessToken: string,
- username?: string
+ username: string
): Promise {
+ if (!username) {
+ throw new Error('GitLab username is required');
+ }
+
try {
let projects: GitLabProject[] = [];
let page = 1;
@@ -117,27 +121,30 @@ export async function getGitLabReadme(
): Promise {
try {
const readmeFiles = ['README.md', 'readme.md', 'Readme.md', 'README', 'readme'];
+ const branches = ['main', 'master'];
- for (const filename of readmeFiles) {
- try {
- const response = await axios.get(
- `${GITLAB_API_URL}/projects/${projectId}/repository/files/${encodeURIComponent(filename)}/raw`,
- {
- headers: {
- 'Authorization': `Bearer ${accessToken}`
- },
- params: {
- ref: 'main'
+ for (const ref of branches) {
+ for (const filename of readmeFiles) {
+ try {
+ const response = await axios.get(
+ `${GITLAB_API_URL}/projects/${projectId}/repository/files/${encodeURIComponent(filename)}/raw`,
+ {
+ headers: {
+ 'Authorization': `Bearer ${accessToken}`
+ },
+ params: {
+ ref
+ }
}
- }
- );
+ );
- if (response.data) {
- return response.data;
+ if (response.data) {
+ return response.data;
+ }
+ } catch (err) {
+ // Try next filename / branch
+ continue;
}
- } catch (err) {
- // Try next filename
- continue;
}
}
@@ -156,10 +163,21 @@ export function extractTitleFromReadme(readme: string | null): string | null {
if (!readme) return null;
const lines = readme.split('\n');
- for (const line of lines) {
- const match = line.match(/^#\s+(.+)/);
- if (match) {
- return match[1].trim();
+ for (let i = 0; i < lines.length; i++) {
+ const line = lines[i];
+
+ // Check for ATX-style H1 (# Title)
+ const atxMatch = line.match(/^\s*#(?![#])\s*(.+)/);
+ if (atxMatch) {
+ return atxMatch[1].trim();
+ }
+
+ // Check for Setext-style H1 (Title\n=====)
+ if (i < lines.length - 1) {
+ const nextLine = lines[i + 1];
+ if (nextLine.trim().match(/^={3,}$/) && line.trim() !== '') {
+ return line.trim();
+ }
}
}
@@ -213,7 +231,7 @@ function convertGitLabProjectToRepository(project: GitLabProject): GitLabReposit
*/
export async function getGitLabProjectsWithTitles(
accessToken: string,
- username?: string
+ username: string
): Promise {
const projects = await getGitLabProjects(accessToken, username);
diff --git a/server/lib/openai.ts b/server/lib/openai.ts
index 891cd64..9d405ed 100644
--- a/server/lib/openai.ts
+++ b/server/lib/openai.ts
@@ -174,27 +174,27 @@ async function generateRepoSummary(
): Promise {
try {
// Build enhanced context with metadata
- let userContent = `Repository Name: ${name}`;
+ const userContentParts: string[] = [`Repository Name: ${name}`];
if (description) {
- userContent += `\nDescription: ${description}`;
+ userContentParts.push(`Description: ${description}`);
}
if (metadata) {
if (metadata.language) {
- userContent += `\nPrimary Language: ${metadata.language}`;
+ userContentParts.push(`Primary Language: ${metadata.language}`);
}
if (metadata.topics && metadata.topics.length > 0) {
- userContent += `\nTopics/Tags: ${metadata.topics.join(', ')}`;
+ userContentParts.push(`Topics/Tags: ${metadata.topics.join(', ')}`);
}
if (metadata.stars > 0) {
- userContent += `\nStars: ${metadata.stars}`;
+ userContentParts.push(`Stars: ${metadata.stars}`);
}
if (metadata.url) {
- userContent += `\nProject URL: ${metadata.url}`;
+ userContentParts.push(`Project URL: ${metadata.url}`);
}
}
@@ -204,53 +204,55 @@ async function generateRepoSummary(
const cleanedReadme = cleanReadmeContent(readme);
const trimmedReadme = intelligentTruncate(cleanedReadme, LLM_CONFIG.README_MAX_LENGTH);
- userContent += `\nREADME:\n${trimmedReadme}`;
+ userContentParts.push(`README:\n${trimmedReadme}`);
} else if (accessToken && owner) {
try {
// Analyze project structure when README is not available
const projectStructure = await analyzeProjectStructure(accessToken, owner, name);
const structureSummary = generateProjectSummary(projectStructure);
- userContent += `\nProject Structure Analysis:\n${structureSummary}`;
+ userContentParts.push(`Project Structure Analysis:\n${structureSummary}`);
// Add detailed structure information
if (projectStructure.frameworkIndicators.length > 0) {
- userContent += `\nDetected Frameworks: ${projectStructure.frameworkIndicators.map(f => f.framework).join(', ')}`;
+ userContentParts.push(`Detected Frameworks: ${projectStructure.frameworkIndicators.map(f => f.framework).join(', ')}`);
}
if (projectStructure.techStack.length > 0) {
- userContent += `\nTech Stack: ${projectStructure.techStack.join(', ')}`;
+ userContentParts.push(`Tech Stack: ${projectStructure.techStack.join(', ')}`);
}
if (projectStructure.sourceFiles.length > 0) {
const entryPoints = projectStructure.sourceFiles.filter(f => f.isEntryPoint);
if (entryPoints.length > 0) {
- userContent += `\nEntry Points: ${entryPoints.map(f => f.name).join(', ')}`;
+ userContentParts.push(`Entry Points: ${entryPoints.map(f => f.name).join(', ')}`);
}
const languages = Array.from(new Set(projectStructure.sourceFiles.map(f => f.language)));
- userContent += `\nLanguages Used: ${languages.join(', ')}`;
+ userContentParts.push(`Languages Used: ${languages.join(', ')}`);
}
if (projectStructure.packageFiles.length > 0) {
const packageInfo = projectStructure.packageFiles[0];
if (packageInfo.description) {
- userContent += `\nPackage Description: ${packageInfo.description}`;
+ userContentParts.push(`Package Description: ${packageInfo.description}`);
}
if (packageInfo.dependencies && packageInfo.dependencies.length > 0) {
const majorDeps = packageInfo.dependencies.slice(0, 10); // Limit to avoid token overflow
- userContent += `\nKey Dependencies: ${majorDeps.join(', ')}`;
+ userContentParts.push(`Key Dependencies: ${majorDeps.join(', ')}`);
}
}
} catch (error) {
console.warn("Failed to analyze project structure:", error);
- userContent += `\nNote: No README available and project structure analysis failed. Analysis based on repository metadata only.`;
+ userContentParts.push(`Note: No README available and project structure analysis failed. Analysis based on repository metadata only.`);
}
} else {
- userContent += `\nNote: No README available. Analysis based on repository metadata only.`;
+ userContentParts.push(`Note: No README available. Analysis based on repository metadata only.`);
}
+ const userContent = userContentParts.join('\n');
+
const prompt = `${customPrompt || DEFAULT_PROMPT}
Additional context: This summary will be displayed in a developer portfolio to showcase technical skills and project impact. Focus on:
diff --git a/server/lib/portfolio-generator.ts b/server/lib/portfolio-generator.ts
index 9912b99..6b80fb3 100644
--- a/server/lib/portfolio-generator.ts
+++ b/server/lib/portfolio-generator.ts
@@ -14,6 +14,16 @@ export function capitalizeFirstLetter(str: string): string {
return str.charAt(0).toUpperCase() + str.slice(1);
}
+export function sanitizeUrl(url: string | null | undefined): string {
+ if (!url) return '#';
+ const trimmed = url.trim();
+ // Check for dangerous protocols
+ if (/^(javascript:|data:|vbscript:)/i.test(trimmed)) {
+ return '#';
+ }
+ return escapeHtml(trimmed);
+}
+
export function generatePortfolioHtml(
username: string,
repositories: Repository[],
@@ -140,11 +150,11 @@ export function generatePortfolioHtml(
★ ${repo.metadata.stars}
` : ''}
-
+
${repo.metadata?.url ?
- `
+ `
`
: ''}
diff --git a/server/lib/project-analyzer.ts b/server/lib/project-analyzer.ts
index bcc8b7e..ab23e09 100644
--- a/server/lib/project-analyzer.ts
+++ b/server/lib/project-analyzer.ts
@@ -129,11 +129,18 @@ const FRAMEWORK_PATTERNS: Record = {
}
};
-const ENTRY_POINT_PATTERNS = [
- 'main.js', 'main.ts', 'index.js', 'index.ts', 'app.js', 'app.ts',
- 'server.js', 'server.ts', 'main.py', 'app.py', 'main.go',
- 'main.rs', 'lib.rs', 'Main.java', 'Application.java'
-];
+const ENTRY_POINT_REGEX = /main\.js|main\.ts|index\.js|index\.ts|app\.js|app\.ts|server\.js|server\.ts|main\.py|app\.py|main\.go|main\.rs|lib\.rs|main\.java|application\.java/i;
+
+const CONFIG_REGEX = /webpack\.config|vite\.config|rollup\.config|babel\.config|eslint|prettier|tsconfig|jest\.config|cypress\.config|docker|nginx\.conf|apache\.conf|\.env|config\.yml|config\.yaml/;
+
+const PACKAGE_FILES = new Set([
+ 'package.json', 'requirements.txt', 'cargo.toml', 'go.mod', 'go.sum',
+ 'pom.xml', 'build.gradle', 'gemfile', 'composer.json', 'pubspec.yaml'
+]);
+
+const SOURCE_EXT_REGEX = /\.(?:js|ts|jsx|tsx|py|java|go|rs|rb|php|cpp|c|cs|swift|kt|dart|vue|svelte)$/;
+
+const MAJOR_TECH_REGEX = /react|vue|angular|express|django|flask|spring|rails|laravel|mongodb|postgresql|mysql|redis|docker|kubernetes|aws|azure|gcp|firebase|graphql|apollo|prisma|typeorm|sequelize/i;
export async function analyzeProjectStructure(
accessToken: string,
@@ -354,9 +361,23 @@ function analyzeGoMod(content: string): PackageInfo {
};
}
-function detectFrameworks(structure: ProjectStructure): FrameworkInfo[] {
+export function detectFrameworks(structure: ProjectStructure): FrameworkInfo[] {
const frameworks: FrameworkInfo[] = [];
+ const rootFilesSet = new Set(structure.rootFiles);
+ const directoriesSet = new Set(structure.directories);
+
+ // Flatten dependencies for faster lookup
+ const allDependencies = new Set();
+ for (const pkg of structure.packageFiles) {
+ if (pkg.dependencies) {
+ for (const dep of pkg.dependencies) {
+ allDependencies.add(dep);
+ }
+ }
+ }
+ const uniqueDependencies = Array.from(allDependencies);
+
for (const [frameworkName, patterns] of Object.entries(FRAMEWORK_PATTERNS)) {
let confidence = 0;
const indicators: string[] = [];
@@ -364,7 +385,7 @@ function detectFrameworks(structure: ProjectStructure): FrameworkInfo[] {
// Check for required files
if (patterns.files) {
for (const file of patterns.files) {
- if (structure.rootFiles.includes(file)) {
+ if (rootFilesSet.has(file)) {
confidence += 30;
indicators.push(`Has ${file}`);
}
@@ -373,14 +394,12 @@ function detectFrameworks(structure: ProjectStructure): FrameworkInfo[] {
// Check for dependencies
if (patterns.dependencies) {
- for (const packageFile of structure.packageFiles) {
- if (packageFile.dependencies) {
- for (const dep of patterns.dependencies) {
- if (packageFile.dependencies.some(d => d.includes(dep))) {
- confidence += 40;
- indicators.push(`Uses ${dep}`);
- }
- }
+ for (const dep of patterns.dependencies) {
+ // Check if ANY installed dependency includes the target framework dependency
+ // We preserve the original substring matching logic
+ if (uniqueDependencies.some(d => d.includes(dep))) {
+ confidence += 40;
+ indicators.push(`Uses ${dep}`);
}
}
}
@@ -388,7 +407,7 @@ function detectFrameworks(structure: ProjectStructure): FrameworkInfo[] {
// Check for config files
if (patterns.configs) {
for (const config of patterns.configs) {
- if (structure.rootFiles.includes(config)) {
+ if (rootFilesSet.has(config)) {
confidence += 20;
indicators.push(`Has ${config}`);
}
@@ -401,13 +420,13 @@ function detectFrameworks(structure: ProjectStructure): FrameworkInfo[] {
if (pattern.endsWith('/')) {
// Directory pattern
const dirName = pattern.slice(0, -1);
- if (structure.directories.includes(dirName)) {
+ if (directoriesSet.has(dirName)) {
confidence += 15;
indicators.push(`Has ${dirName}/ directory`);
}
} else {
// File pattern
- if (structure.rootFiles.includes(pattern)) {
+ if (rootFilesSet.has(pattern)) {
confidence += 25;
indicators.push(`Has ${pattern}`);
}
@@ -470,28 +489,15 @@ function extractTechStack(structure: ProjectStructure): string[] {
}
function isPackageFile(fileName: string): boolean {
- const packageFiles = [
- 'package.json', 'requirements.txt', 'cargo.toml', 'go.mod', 'go.sum',
- 'pom.xml', 'build.gradle', 'gemfile', 'composer.json', 'pubspec.yaml'
- ];
- return packageFiles.includes(fileName);
+ return PACKAGE_FILES.has(fileName);
}
function isConfigFile(fileName: string): boolean {
- const configPatterns = [
- 'webpack.config', 'vite.config', 'rollup.config', 'babel.config',
- 'eslint', 'prettier', 'tsconfig', 'jest.config', 'cypress.config',
- 'docker', 'nginx.conf', 'apache.conf', '.env', 'config.yml', 'config.yaml'
- ];
- return configPatterns.some(pattern => fileName.includes(pattern));
+ return CONFIG_REGEX.test(fileName);
}
function isSourceFile(fileName: string): boolean {
- const sourceExtensions = [
- '.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.go', '.rs', '.rb',
- '.php', '.cpp', '.c', '.cs', '.swift', '.kt', '.dart', '.vue', '.svelte'
- ];
- return sourceExtensions.some(ext => fileName.endsWith(ext));
+ return SOURCE_EXT_REGEX.test(fileName);
}
function getConfigType(fileName: string): 'config' | 'build' | 'deployment' | 'testing' | 'linting' {
@@ -548,9 +554,7 @@ function getLanguageFromExtension(extension: string): string {
}
function isEntryPoint(fileName: string): boolean {
- return ENTRY_POINT_PATTERNS.some(pattern =>
- fileName.toLowerCase().includes(pattern.toLowerCase())
- );
+ return ENTRY_POINT_REGEX.test(fileName);
}
function getMostCommonLanguage(languages: string[]): string | null {
@@ -566,13 +570,7 @@ function getMostCommonLanguage(languages: string[]): string | null {
}
function isMajorTechnology(dependency: string): boolean {
- const majorTechs = [
- 'react', 'vue', 'angular', 'express', 'django', 'flask', 'spring',
- 'rails', 'laravel', 'mongodb', 'postgresql', 'mysql', 'redis',
- 'docker', 'kubernetes', 'aws', 'azure', 'gcp', 'firebase',
- 'graphql', 'apollo', 'prisma', 'typeorm', 'sequelize'
- ];
- return majorTechs.some(tech => dependency.toLowerCase().includes(tech));
+ return MAJOR_TECH_REGEX.test(dependency);
}
export function generateProjectSummary(structure: ProjectStructure): string {
diff --git a/server/lib/readme-cleaner.ts b/server/lib/readme-cleaner.ts
index 5455f70..15830c6 100644
--- a/server/lib/readme-cleaner.ts
+++ b/server/lib/readme-cleaner.ts
@@ -2,64 +2,123 @@
* Utility functions for cleaning README content before LLM processing
*/
-/**
- * Removes common badges and shields from README content
- * @param readme - Raw README content
- * @returns Cleaned README content with badges removed
- */
-export function removeBadges(readme: string): string {
- if (!readme) return readme;
+// ⚡ Bolt Optimization: Extracted RegExps to module level to avoid redundant compilation overhead on every execution
+const badgePatterns = [
+ // Shield.io badges - [](link-url)
+ /\[\!\[([^\]]*)\]\([^)]*shields\.io[^)]*\)\]\([^)]*\)/g,
+
+ // PyPI badges
+ /\[\!\[PyPI[^\]]*\]\([^)]*pypi\.org[^)]*\)\]\([^)]*\)/g,
+
+ // GitHub workflow/action badges
+ /\[\!\[([^\]]*)\]\([^)]*github\.com[^)]*workflows[^)]*\)\]\([^)]*\)/g,
+ /\[\!\[([^\]]*)\]\([^)]*github\.com[^)]*actions[^)]*\)\]\([^)]*\)/g,
+
+ // License badges
+ /\[\!\[License[^\]]*\]\([^)]*badge[^)]*license[^)]*\)\]\([^)]*\)/g,
+ /\[\!\[([^\]]*license[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+
+ // Version/Release badges
+ /\[\!\[([^\]]*version[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ /\[\!\[([^\]]*release[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ /\[\!\[Changelog[^\]]*\]\([^)]*\)\]\([^)]*\)/g,
+
+ // Test/CI badges
+ /\[\!\[([^\]]*test[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ /\[\!\[([^\]]*build[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ /\[\!\[([^\]]*ci[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+
+ // Coverage badges
+ /\[\!\[([^\]]*coverage[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ /\[\!\[([^\]]*codecov[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
- // Common badge patterns to remove
- const badgePatterns = [
- // Shield.io badges - [](link-url)
- /\[\!\[([^\]]*)\]\([^)]*shields\.io[^)]*\)\]\([^)]*\)/g,
+ // Documentation badges
+ /\[\!\[([^\]]*docs[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ /\[\!\[([^\]]*documentation[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
- // PyPI badges
- /\[\!\[PyPI[^\]]*\]\([^)]*pypi\.org[^)]*\)\]\([^)]*\)/g,
+ // Download/Install badges
+ /\[\!\[([^\]]*download[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ /\[\!\[([^\]]*install[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
- // GitHub workflow/action badges
- /\[\!\[([^\]]*)\]\([^)]*github\.com[^)]*workflows[^)]*\)\]\([^)]*\)/g,
- /\[\!\[([^\]]*)\]\([^)]*github\.com[^)]*actions[^)]*\)\]\([^)]*\)/g,
+ // Generic img.shields.io badges
+ /\[\!\[([^\]]*)\]\([^)]*img\.shields\.io[^)]*\)\]\([^)]*\)/g,
- // License badges
- /\[\!\[License[^\]]*\]\([^)]*badge[^)]*license[^)]*\)\]\([^)]*\)/g,
- /\[\!\[([^\]]*license[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ // Standalone shield images without links
+ /\!\[([^\]]*)\]\([^)]*shields\.io[^)]*\)/g,
+ /\!\[([^\]]*)\]\([^)]*img\.shields\.io[^)]*\)/g,
- // Version/Release badges
- /\[\!\[([^\]]*version[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
- /\[\!\[([^\]]*release[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
- /\[\!\[Changelog[^\]]*\]\([^)]*\)\]\([^)]*\)/g,
+ // Common badge hosting services
+ /\[\!\[([^\]]*)\]\([^)]*badge\.fury\.io[^)]*\)\]\([^)]*\)/g,
+ /\[\!\[([^\]]*)\]\([^)]*badgen\.net[^)]*\)\]\([^)]*\)/g,
+ /\[\!\[([^\]]*)\]\([^)]*flat\.badgen\.net[^)]*\)\]\([^)]*\)/g,
+];
- // Test/CI badges
- /\[\!\[([^\]]*test[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
- /\[\!\[([^\]]*build[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
- /\[\!\[([^\]]*ci[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+const sectionsToRemove = [
+ // License sections
+ /^#+\s*(License|Licensing)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
- // Coverage badges
- /\[\!\[([^\]]*coverage[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
- /\[\!\[([^\]]*codecov[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ // Code of Conduct sections
+ /^#+\s*(Code of Conduct|Contributor Code of Conduct|Contributing Guidelines)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
- // Documentation badges
- /\[\!\[([^\]]*docs[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
- /\[\!\[([^\]]*documentation[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ // Security sections (often boilerplate)
+ /^#+\s*(Security|Security Policy|Reporting Security Issues)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
- // Download/Install badges
- /\[\!\[([^\]]*download[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
- /\[\!\[([^\]]*install[^\]]*)\]\([^)]*\)\]\([^)]*\)/gi,
+ // Changelog sections (version history not relevant for portfolio)
+ /^#+\s*(Changelog|Change Log|Release Notes|Version History)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
- // Generic img.shields.io badges
- /\[\!\[([^\]]*)\]\([^)]*img\.shields\.io[^)]*\)\]\([^)]*\)/g,
+ // Support/Help sections
+ /^#+\s*(Support|Getting Help|Help|Community)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
- // Standalone shield images without links
- /\!\[([^\]]*)\]\([^)]*shields\.io[^)]*\)/g,
- /\!\[([^\]]*)\]\([^)]*img\.shields\.io[^)]*\)/g,
+ // Acknowledgments/Credits (unless very brief)
+ /^#+\s*(Acknowledgments?|Credits?|Thanks?)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
- // Common badge hosting services
- /\[\!\[([^\]]*)\]\([^)]*badge\.fury\.io[^)]*\)\]\([^)]*\)/g,
- /\[\!\[([^\]]*)\]\([^)]*badgen\.net[^)]*\)\]\([^)]*\)/g,
- /\[\!\[([^\]]*)\]\([^)]*flat\.badgen\.net[^)]*\)\]\([^)]*\)/g,
- ];
+ // Detailed contributing sections
+ /^#+\s*(Contributing|How to Contribute|Contribution Guidelines)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
+
+ // FAQ sections (often too detailed for portfolio)
+ /^#+\s*(FAQ|Frequently Asked Questions)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
+
+ // Detailed troubleshooting sections
+ /^#+\s*(Troubleshooting|Common Issues|Known Issues)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
+
+ // Table of contents (not needed for LLM)
+ /^#+\s*(Table of Contents?|Contents?|TOC)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
+];
+
+const prioritySections = [
+ /^#+\s*(Description|About|Overview|What is|Introduction|Summary)/i,
+ /^#+\s*(Features|Key Features|Highlights|What it does)/i,
+ /^#+\s*(Quick Start|Getting Started|Quickstart)/i,
+ /^#+\s*(Installation|Setup|Install)/i,
+ /^#+\s*(Usage|How to use|Examples?|Demo)/i,
+ /^#+\s*(API|Documentation|Docs)/i,
+ /^#+\s*(Architecture|Design|How it works)/i,
+ /^#+\s*(Requirements|Prerequisites|Dependencies)/i,
+ /^#+\s*(Configuration|Config|Settings)/i,
+];
+
+const noisePatterns = [
+ // Remove standalone horizontal rules
+ /^---+$/gm,
+ /^===+$/gm,
+
+ // Remove empty badge sections
+ /^\s*\[!\[.*?\]\(.*?\)\]\(.*?\)\s*$/gm,
+
+ // Remove excessive code block markers without content
+ /```\s*\n\s*```/g,
+
+ // Remove table of contents links
+ /^\s*[-*]\s*\[.*?\]\(#.*?\)\s*$/gm,
+];
+
+/**
+ * Removes common badges and shields from README content
+ * @param readme - Raw README content
+ * @returns Cleaned README content with badges removed
+ */
+export function removeBadges(readme: string): string {
+ if (!readme) return readme;
let cleanedReadme = readme;
@@ -98,38 +157,6 @@ export function cleanReadmeContent(readme: string | null | undefined): string {
* Removes common boilerplate sections that don't add project context
*/
function removeBoilerplateSections(content: string): string {
- const sectionsToRemove = [
- // License sections
- /^#+\s*(License|Licensing)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
-
- // Code of Conduct sections
- /^#+\s*(Code of Conduct|Contributor Code of Conduct|Contributing Guidelines)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
-
- // Security sections (often boilerplate)
- /^#+\s*(Security|Security Policy|Reporting Security Issues)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
-
- // Changelog sections (version history not relevant for portfolio)
- /^#+\s*(Changelog|Change Log|Release Notes|Version History)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
-
- // Support/Help sections
- /^#+\s*(Support|Getting Help|Help|Community)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
-
- // Acknowledgments/Credits (unless very brief)
- /^#+\s*(Acknowledgments?|Credits?|Thanks?)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
-
- // Detailed contributing sections
- /^#+\s*(Contributing|How to Contribute|Contribution Guidelines)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
-
- // FAQ sections (often too detailed for portfolio)
- /^#+\s*(FAQ|Frequently Asked Questions)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
-
- // Detailed troubleshooting sections
- /^#+\s*(Troubleshooting|Common Issues|Known Issues)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
-
- // Table of contents (not needed for LLM)
- /^#+\s*(Table of Contents?|Contents?|TOC)\s*\n[\s\S]*?(?=\n#+|\n\n---|\n\n===|$)/gmi,
- ];
-
let cleaned = content;
sectionsToRemove.forEach(pattern => {
cleaned = cleaned.replace(pattern, '');
@@ -151,19 +178,6 @@ function extractRelevantSections(content: string): string {
relevantSections.push(sections[0]);
}
- // Priority sections that provide project context
- const prioritySections = [
- /^#+\s*(Description|About|Overview|What is|Introduction|Summary)/i,
- /^#+\s*(Features|Key Features|Highlights|What it does)/i,
- /^#+\s*(Quick Start|Getting Started|Quickstart)/i,
- /^#+\s*(Installation|Setup|Install)/i,
- /^#+\s*(Usage|How to use|Examples?|Demo)/i,
- /^#+\s*(API|Documentation|Docs)/i,
- /^#+\s*(Architecture|Design|How it works)/i,
- /^#+\s*(Requirements|Prerequisites|Dependencies)/i,
- /^#+\s*(Configuration|Config|Settings)/i,
- ];
-
// Add sections that match priority patterns
sections.slice(1).forEach(section => {
const sectionHeader = section.split('\n')[0];
@@ -233,22 +247,6 @@ function limitSectionLength(section: string, maxLength: number): string {
*/
function cleanupWhitespace(content: string): string {
let cleaned = content;
-
- // Remove common noise patterns
- const noisePatterns = [
- // Remove standalone horizontal rules
- /^---+$/gm,
- /^===+$/gm,
-
- // Remove empty badge sections
- /^\s*\[!\[.*?\]\(.*?\)\]\(.*?\)\s*$/gm,
-
- // Remove excessive code block markers without content
- /```\s*\n\s*```/g,
-
- // Remove table of contents links
- /^\s*[-*]\s*\[.*?\]\(#.*?\)\s*$/gm,
- ];
noisePatterns.forEach(pattern => {
cleaned = cleaned.replace(pattern, '');
diff --git a/server/lib/rss.ts b/server/lib/rss.ts
index c204866..8af4422 100644
--- a/server/lib/rss.ts
+++ b/server/lib/rss.ts
@@ -1,7 +1,56 @@
import Parser from 'rss-parser';
import { BlogPost } from '../../shared/schema';
+import dns from 'dns';
+import net from 'net';
+
+// Check for private / local IP addresses to prevent SSRF
+const isPrivateIP = (ip: string): boolean => {
+ const cleanIp = ip.replace(/^\[(.*)\]$/, '$1');
+ const ipVersion = net.isIP(cleanIp);
+ if (!ipVersion) return false;
+
+ // IPv4: loopback, private ranges, link-local, unspecified, cloud metadata
+ if (ipVersion === 4) {
+ return /^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[0-1])\.|169\.254\.|0\.0\.0\.0)/.test(cleanIp);
+ }
+
+ // IPv6: loopback (::1), unspecified (::), unique-local (fc00::/7),
+ // link-local (fe80::/10), and IPv4-mapped (::ffff:private)
+ const lowerIp = cleanIp.toLowerCase();
+ if (lowerIp === '::1' || lowerIp === '::') return true;
+ if (/^f[cd]/i.test(lowerIp)) return true; // fc00::/7 unique-local
+ if (/^fe[89ab]/i.test(lowerIp)) return true; // fe80::/10 link-local
+ // IPv4-mapped IPv6 (::ffff:10.x.x.x, etc.)
+ const v4Mapped = cleanIp.match(/::ffff:(\d+\.\d+\.\d+\.\d+)$/i);
+ if (v4Mapped) return isPrivateIP(v4Mapped[1]);
+
+ return false;
+};
+
+// Custom DNS lookup to prevent DNS rebinding attacks pointing to local IPs
+const customLookup = (
+ hostname: string,
+ options: dns.LookupOptions | number,
+ callback: (err: NodeJS.ErrnoException | null, address: string | dns.LookupAddress[], family: number) => void
+) => {
+ dns.lookup(hostname, options as dns.LookupOptions, (err, address, family) => {
+ if (err) return callback(err, address, family);
+
+ const addresses = Array.isArray(address) ? address : [{ address }];
+ for (const addr of addresses) {
+ if (isPrivateIP(addr.address)) {
+ return callback(new Error(`Access to private IP ${addr.address} is blocked`), address, family);
+ }
+ }
+ callback(err, address, family);
+ });
+};
const parser = new Parser({
+ maxRedirects: 0, // Prevent redirects to IP literals to bypass dns.lookup
+ requestOptions: {
+ lookup: customLookup
+ },
customFields: {
item: [
['content:encoded', 'contentEncoded'],
@@ -37,6 +86,12 @@ export interface RSSFeed {
*/
export async function fetchRSSFeed(feedUrl: string): Promise {
try {
+ // Pre-flight check: prevent direct IP access to private network
+ const url = new URL(feedUrl);
+ if (isPrivateIP(url.hostname)) {
+ throw new Error(`Direct access to private network is blocked`);
+ }
+
const feed = await parser.parseURL(feedUrl);
return {
diff --git a/server/lib/security.ts b/server/lib/security.ts
new file mode 100644
index 0000000..e4f306b
--- /dev/null
+++ b/server/lib/security.ts
@@ -0,0 +1,47 @@
+
+/**
+ * Safely stringifies a value for use in a ' };
+ const result = safeJsonStringify(obj);
+ expect(result).toContain('\\u003cscript\\u003e');
+ expect(result).toContain('\\u003c/script\\u003e');
+ expect(result).not.toContain('');
+ });
+
+ it('should escape HTML entities', () => {
+ const obj = { html: '&span
' };
+ const result = safeJsonStringify(obj);
+ expect(result).toContain('\\u003cdiv\\u003e');
+ expect(result).toContain('\\u0026span');
+ expect(result).toContain('\\u003c/div\\u003e');
+ });
+
+ it('should be valid JSON when parsed', () => {
+ const obj = {
+ script: '',
+ html: 'bold',
+ amp: 'Fish & Chips'
+ };
+ const result = safeJsonStringify(obj);
+ const parsed = JSON.parse(result);
+ expect(parsed).toEqual(obj);
+ });
+
+ it('should handle arrays', () => {
+ const arr = ['', '&bar'];
+ const result = safeJsonStringify(arr);
+ expect(result).toContain('\\u003cfoo\\u003e');
+ expect(result).toContain('\\u0026bar');
+ expect(JSON.parse(result)).toEqual(arr);
+ });
+
+ it('should handle complex nested objects', () => {
+ const complex = {
+ a: {
+ b: [
+ { c: '