diff --git a/dist/index.js b/dist/index.js index db73e30..a0bf6fa 100644 --- a/dist/index.js +++ b/dist/index.js @@ -10924,6 +10924,8 @@ module.exports = picomatch; const WIN_SLASH = '\\\\/'; const WIN_NO_SLASH = `[^${WIN_SLASH}]`; +const DEFAULT_MAX_EXTGLOB_RECURSION = 0; + /** * Posix glob regex */ @@ -10990,6 +10992,7 @@ const WINDOWS_CHARS = { */ const POSIX_REGEX_SOURCE = { + __proto__: null, alnum: 'a-zA-Z0-9', alpha: 'a-zA-Z', ascii: '\\x00-\\x7F', @@ -11007,6 +11010,7 @@ const POSIX_REGEX_SOURCE = { }; module.exports = { + DEFAULT_MAX_EXTGLOB_RECURSION, MAX_LENGTH: 1024 * 64, POSIX_REGEX_SOURCE, @@ -11153,6 +11157,277 @@ const syntaxError = (type, char) => { return `Missing ${type}: "${char}" - use "\\\\${char}" to match literal characters`; }; +const splitTopLevel = input => { + const parts = []; + let bracket = 0; + let paren = 0; + let quote = 0; + let value = ''; + let escaped = false; + + for (const ch of input) { + if (escaped === true) { + value += ch; + escaped = false; + continue; + } + + if (ch === '\\') { + value += ch; + escaped = true; + continue; + } + + if (ch === '"') { + quote = quote === 1 ? 0 : 1; + value += ch; + continue; + } + + if (quote === 0) { + if (ch === '[') { + bracket++; + } else if (ch === ']' && bracket > 0) { + bracket--; + } else if (bracket === 0) { + if (ch === '(') { + paren++; + } else if (ch === ')' && paren > 0) { + paren--; + } else if (ch === '|' && paren === 0) { + parts.push(value); + value = ''; + continue; + } + } + } + + value += ch; + } + + parts.push(value); + return parts; +}; + +const isPlainBranch = branch => { + let escaped = false; + + for (const ch of branch) { + if (escaped === true) { + escaped = false; + continue; + } + + if (ch === '\\') { + escaped = true; + continue; + } + + if (/[?*+@!()[\]{}]/.test(ch)) { + return false; + } + } + + return true; +}; + +const normalizeSimpleBranch = branch => { + let value = branch.trim(); + let changed = true; + + while (changed === true) { + changed = false; + + if (/^@\([^\\()[\]{}|]+\)$/.test(value)) { + value = value.slice(2, -1); + changed = true; + } + } + + if (!isPlainBranch(value)) { + return; + } + + return value.replace(/\\(.)/g, '$1'); +}; + +const hasRepeatedCharPrefixOverlap = branches => { + const values = branches.map(normalizeSimpleBranch).filter(Boolean); + + for (let i = 0; i < values.length; i++) { + for (let j = i + 1; j < values.length; j++) { + const a = values[i]; + const b = values[j]; + const char = a[0]; + + if (!char || a !== char.repeat(a.length) || b !== char.repeat(b.length)) { + continue; + } + + if (a === b || a.startsWith(b) || b.startsWith(a)) { + return true; + } + } + } + + return false; +}; + +const parseRepeatedExtglob = (pattern, requireEnd = true) => { + if ((pattern[0] !== '+' && pattern[0] !== '*') || pattern[1] !== '(') { + return; + } + + let bracket = 0; + let paren = 0; + let quote = 0; + let escaped = false; + + for (let i = 1; i < pattern.length; i++) { + const ch = pattern[i]; + + if (escaped === true) { + escaped = false; + continue; + } + + if (ch === '\\') { + escaped = true; + continue; + } + + if (ch === '"') { + quote = quote === 1 ? 0 : 1; + continue; + } + + if (quote === 1) { + continue; + } + + if (ch === '[') { + bracket++; + continue; + } + + if (ch === ']' && bracket > 0) { + bracket--; + continue; + } + + if (bracket > 0) { + continue; + } + + if (ch === '(') { + paren++; + continue; + } + + if (ch === ')') { + paren--; + + if (paren === 0) { + if (requireEnd === true && i !== pattern.length - 1) { + return; + } + + return { + type: pattern[0], + body: pattern.slice(2, i), + end: i + }; + } + } + } +}; + +const getStarExtglobSequenceOutput = pattern => { + let index = 0; + const chars = []; + + while (index < pattern.length) { + const match = parseRepeatedExtglob(pattern.slice(index), false); + + if (!match || match.type !== '*') { + return; + } + + const branches = splitTopLevel(match.body).map(branch => branch.trim()); + if (branches.length !== 1) { + return; + } + + const branch = normalizeSimpleBranch(branches[0]); + if (!branch || branch.length !== 1) { + return; + } + + chars.push(branch); + index += match.end + 1; + } + + if (chars.length < 1) { + return; + } + + const source = chars.length === 1 + ? utils.escapeRegex(chars[0]) + : `[${chars.map(ch => utils.escapeRegex(ch)).join('')}]`; + + return `${source}*`; +}; + +const repeatedExtglobRecursion = pattern => { + let depth = 0; + let value = pattern.trim(); + let match = parseRepeatedExtglob(value); + + while (match) { + depth++; + value = match.body.trim(); + match = parseRepeatedExtglob(value); + } + + return depth; +}; + +const analyzeRepeatedExtglob = (body, options) => { + if (options.maxExtglobRecursion === false) { + return { risky: false }; + } + + const max = + typeof options.maxExtglobRecursion === 'number' + ? options.maxExtglobRecursion + : constants.DEFAULT_MAX_EXTGLOB_RECURSION; + + const branches = splitTopLevel(body).map(branch => branch.trim()); + + if (branches.length > 1) { + if ( + branches.some(branch => branch === '') || + branches.some(branch => /^[*?]+$/.test(branch)) || + hasRepeatedCharPrefixOverlap(branches) + ) { + return { risky: true }; + } + } + + for (const branch of branches) { + const safeOutput = getStarExtglobSequenceOutput(branch); + if (safeOutput) { + return { risky: true, safeOutput }; + } + + if (repeatedExtglobRecursion(branch) > max) { + return { risky: true }; + } + } + + return { risky: false }; +}; + /** * Parse the given input string. * @param {String} input @@ -11333,6 +11608,8 @@ const parse = (input, options) => { token.prev = prev; token.parens = state.parens; token.output = state.output; + token.startIndex = state.index; + token.tokensIndex = tokens.length; const output = (opts.capture ? '(' : '') + token.open; increment('parens'); @@ -11342,6 +11619,34 @@ const parse = (input, options) => { }; const extglobClose = token => { + const literal = input.slice(token.startIndex, state.index + 1); + const body = input.slice(token.startIndex + 2, state.index); + const analysis = analyzeRepeatedExtglob(body, opts); + + if ((token.type === 'plus' || token.type === 'star') && analysis.risky) { + const safeOutput = analysis.safeOutput + ? (token.output ? '' : ONE_CHAR) + (opts.capture ? `(${analysis.safeOutput})` : analysis.safeOutput) + : undefined; + const open = tokens[token.tokensIndex]; + + open.type = 'text'; + open.value = literal; + open.output = safeOutput || utils.escapeRegex(literal); + + for (let i = token.tokensIndex + 1; i < tokens.length; i++) { + tokens[i].value = ''; + tokens[i].output = ''; + delete tokens[i].suffix; + } + + state.output = token.output + open.output; + state.backtrack = true; + + push({ type: 'paren', extglob: true, value, output: '' }); + decrement('parens'); + return; + } + let output = token.close + (opts.capture ? ')' : ''); let rest; @@ -12433,6 +12738,14 @@ picomatch.scan = (input, options) => scan(input, options); * Compile a regular expression from the `state` object returned by the * [parse()](#parse) method. * + * ```js + * const picomatch = require('picomatch'); + * const state = picomatch.parse('*.js'); + * // picomatch.compileRe(state[, options]); + * + * console.log(picomatch.compileRe(state)); + * //=> /^(?:(?!\.)(?=.)[^/]*?\.js)$/ + * ``` * @param {Object} `state` * @param {Object} `options` * @param {Boolean} `returnOutput` Intended for implementors, this argument allows you to return the raw output from the parser. @@ -12468,10 +12781,10 @@ picomatch.compileRe = (state, options, returnOutput = false, returnState = false * * ```js * const picomatch = require('picomatch'); - * const state = picomatch.parse('*.js'); - * // picomatch.compileRe(state[, options]); + * // picomatch.makeRe(state[, options]); * - * console.log(picomatch.compileRe(state)); + * const result = picomatch.makeRe('*.js'); + * console.log(result); * //=> /^(?:(?!\.)(?=.)[^/]*?\.js)$/ * ``` * @param {String} `state` The object returned from the `.parse` method.