[d565449] | 1 | // Partial port of python's argparse module, version 3.9.0 (only wrap and fill functions):
|
---|
| 2 | // https://github.com/python/cpython/blob/v3.9.0b4/Lib/textwrap.py
|
---|
| 3 |
|
---|
| 4 | 'use strict'
|
---|
| 5 |
|
---|
| 6 | /*
|
---|
| 7 | * Text wrapping and filling.
|
---|
| 8 | */
|
---|
| 9 |
|
---|
| 10 | // Copyright (C) 1999-2001 Gregory P. Ward.
|
---|
| 11 | // Copyright (C) 2002, 2003 Python Software Foundation.
|
---|
| 12 | // Copyright (C) 2020 argparse.js authors
|
---|
| 13 | // Originally written by Greg Ward <gward@python.net>
|
---|
| 14 |
|
---|
| 15 | // Hardcode the recognized whitespace characters to the US-ASCII
|
---|
| 16 | // whitespace characters. The main reason for doing this is that
|
---|
| 17 | // some Unicode spaces (like \u00a0) are non-breaking whitespaces.
|
---|
| 18 | //
|
---|
| 19 | // This less funky little regex just split on recognized spaces. E.g.
|
---|
| 20 | // "Hello there -- you goof-ball, use the -b option!"
|
---|
| 21 | // splits into
|
---|
| 22 | // Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
|
---|
| 23 | const wordsep_simple_re = /([\t\n\x0b\x0c\r ]+)/
|
---|
| 24 |
|
---|
| 25 | class TextWrapper {
|
---|
| 26 | /*
|
---|
| 27 | * Object for wrapping/filling text. The public interface consists of
|
---|
| 28 | * the wrap() and fill() methods; the other methods are just there for
|
---|
| 29 | * subclasses to override in order to tweak the default behaviour.
|
---|
| 30 | * If you want to completely replace the main wrapping algorithm,
|
---|
| 31 | * you'll probably have to override _wrap_chunks().
|
---|
| 32 | *
|
---|
| 33 | * Several instance attributes control various aspects of wrapping:
|
---|
| 34 | * width (default: 70)
|
---|
| 35 | * the maximum width of wrapped lines (unless break_long_words
|
---|
| 36 | * is false)
|
---|
| 37 | * initial_indent (default: "")
|
---|
| 38 | * string that will be prepended to the first line of wrapped
|
---|
| 39 | * output. Counts towards the line's width.
|
---|
| 40 | * subsequent_indent (default: "")
|
---|
| 41 | * string that will be prepended to all lines save the first
|
---|
| 42 | * of wrapped output; also counts towards each line's width.
|
---|
| 43 | * expand_tabs (default: true)
|
---|
| 44 | * Expand tabs in input text to spaces before further processing.
|
---|
| 45 | * Each tab will become 0 .. 'tabsize' spaces, depending on its position
|
---|
| 46 | * in its line. If false, each tab is treated as a single character.
|
---|
| 47 | * tabsize (default: 8)
|
---|
| 48 | * Expand tabs in input text to 0 .. 'tabsize' spaces, unless
|
---|
| 49 | * 'expand_tabs' is false.
|
---|
| 50 | * replace_whitespace (default: true)
|
---|
| 51 | * Replace all whitespace characters in the input text by spaces
|
---|
| 52 | * after tab expansion. Note that if expand_tabs is false and
|
---|
| 53 | * replace_whitespace is true, every tab will be converted to a
|
---|
| 54 | * single space!
|
---|
| 55 | * fix_sentence_endings (default: false)
|
---|
| 56 | * Ensure that sentence-ending punctuation is always followed
|
---|
| 57 | * by two spaces. Off by default because the algorithm is
|
---|
| 58 | * (unavoidably) imperfect.
|
---|
| 59 | * break_long_words (default: true)
|
---|
| 60 | * Break words longer than 'width'. If false, those words will not
|
---|
| 61 | * be broken, and some lines might be longer than 'width'.
|
---|
| 62 | * break_on_hyphens (default: true)
|
---|
| 63 | * Allow breaking hyphenated words. If true, wrapping will occur
|
---|
| 64 | * preferably on whitespaces and right after hyphens part of
|
---|
| 65 | * compound words.
|
---|
| 66 | * drop_whitespace (default: true)
|
---|
| 67 | * Drop leading and trailing whitespace from lines.
|
---|
| 68 | * max_lines (default: None)
|
---|
| 69 | * Truncate wrapped lines.
|
---|
| 70 | * placeholder (default: ' [...]')
|
---|
| 71 | * Append to the last line of truncated text.
|
---|
| 72 | */
|
---|
| 73 |
|
---|
| 74 | constructor(options = {}) {
|
---|
| 75 | let {
|
---|
| 76 | width = 70,
|
---|
| 77 | initial_indent = '',
|
---|
| 78 | subsequent_indent = '',
|
---|
| 79 | expand_tabs = true,
|
---|
| 80 | replace_whitespace = true,
|
---|
| 81 | fix_sentence_endings = false,
|
---|
| 82 | break_long_words = true,
|
---|
| 83 | drop_whitespace = true,
|
---|
| 84 | break_on_hyphens = true,
|
---|
| 85 | tabsize = 8,
|
---|
| 86 | max_lines = undefined,
|
---|
| 87 | placeholder=' [...]'
|
---|
| 88 | } = options
|
---|
| 89 |
|
---|
| 90 | this.width = width
|
---|
| 91 | this.initial_indent = initial_indent
|
---|
| 92 | this.subsequent_indent = subsequent_indent
|
---|
| 93 | this.expand_tabs = expand_tabs
|
---|
| 94 | this.replace_whitespace = replace_whitespace
|
---|
| 95 | this.fix_sentence_endings = fix_sentence_endings
|
---|
| 96 | this.break_long_words = break_long_words
|
---|
| 97 | this.drop_whitespace = drop_whitespace
|
---|
| 98 | this.break_on_hyphens = break_on_hyphens
|
---|
| 99 | this.tabsize = tabsize
|
---|
| 100 | this.max_lines = max_lines
|
---|
| 101 | this.placeholder = placeholder
|
---|
| 102 | }
|
---|
| 103 |
|
---|
| 104 |
|
---|
| 105 | // -- Private methods -----------------------------------------------
|
---|
| 106 | // (possibly useful for subclasses to override)
|
---|
| 107 |
|
---|
| 108 | _munge_whitespace(text) {
|
---|
| 109 | /*
|
---|
| 110 | * _munge_whitespace(text : string) -> string
|
---|
| 111 | *
|
---|
| 112 | * Munge whitespace in text: expand tabs and convert all other
|
---|
| 113 | * whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
|
---|
| 114 | * becomes " foo bar baz".
|
---|
| 115 | */
|
---|
| 116 | if (this.expand_tabs) {
|
---|
| 117 | text = text.replace(/\t/g, ' '.repeat(this.tabsize)) // not strictly correct in js
|
---|
| 118 | }
|
---|
| 119 | if (this.replace_whitespace) {
|
---|
| 120 | text = text.replace(/[\t\n\x0b\x0c\r]/g, ' ')
|
---|
| 121 | }
|
---|
| 122 | return text
|
---|
| 123 | }
|
---|
| 124 |
|
---|
| 125 | _split(text) {
|
---|
| 126 | /*
|
---|
| 127 | * _split(text : string) -> [string]
|
---|
| 128 | *
|
---|
| 129 | * Split the text to wrap into indivisible chunks. Chunks are
|
---|
| 130 | * not quite the same as words; see _wrap_chunks() for full
|
---|
| 131 | * details. As an example, the text
|
---|
| 132 | * Look, goof-ball -- use the -b option!
|
---|
| 133 | * breaks into the following chunks:
|
---|
| 134 | * 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
|
---|
| 135 | * 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
|
---|
| 136 | * if break_on_hyphens is True, or in:
|
---|
| 137 | * 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
|
---|
| 138 | * 'use', ' ', 'the', ' ', '-b', ' ', option!'
|
---|
| 139 | * otherwise.
|
---|
| 140 | */
|
---|
| 141 | let chunks = text.split(wordsep_simple_re)
|
---|
| 142 | chunks = chunks.filter(Boolean)
|
---|
| 143 | return chunks
|
---|
| 144 | }
|
---|
| 145 |
|
---|
| 146 | _handle_long_word(reversed_chunks, cur_line, cur_len, width) {
|
---|
| 147 | /*
|
---|
| 148 | * _handle_long_word(chunks : [string],
|
---|
| 149 | * cur_line : [string],
|
---|
| 150 | * cur_len : int, width : int)
|
---|
| 151 | *
|
---|
| 152 | * Handle a chunk of text (most likely a word, not whitespace) that
|
---|
| 153 | * is too long to fit in any line.
|
---|
| 154 | */
|
---|
| 155 | // Figure out when indent is larger than the specified width, and make
|
---|
| 156 | // sure at least one character is stripped off on every pass
|
---|
| 157 | let space_left
|
---|
| 158 | if (width < 1) {
|
---|
| 159 | space_left = 1
|
---|
| 160 | } else {
|
---|
| 161 | space_left = width - cur_len
|
---|
| 162 | }
|
---|
| 163 |
|
---|
| 164 | // If we're allowed to break long words, then do so: put as much
|
---|
| 165 | // of the next chunk onto the current line as will fit.
|
---|
| 166 | if (this.break_long_words) {
|
---|
| 167 | cur_line.push(reversed_chunks[reversed_chunks.length - 1].slice(0, space_left))
|
---|
| 168 | reversed_chunks[reversed_chunks.length - 1] = reversed_chunks[reversed_chunks.length - 1].slice(space_left)
|
---|
| 169 |
|
---|
| 170 | // Otherwise, we have to preserve the long word intact. Only add
|
---|
| 171 | // it to the current line if there's nothing already there --
|
---|
| 172 | // that minimizes how much we violate the width constraint.
|
---|
| 173 | } else if (!cur_line) {
|
---|
| 174 | cur_line.push(...reversed_chunks.pop())
|
---|
| 175 | }
|
---|
| 176 |
|
---|
| 177 | // If we're not allowed to break long words, and there's already
|
---|
| 178 | // text on the current line, do nothing. Next time through the
|
---|
| 179 | // main loop of _wrap_chunks(), we'll wind up here again, but
|
---|
| 180 | // cur_len will be zero, so the next line will be entirely
|
---|
| 181 | // devoted to the long word that we can't handle right now.
|
---|
| 182 | }
|
---|
| 183 |
|
---|
| 184 | _wrap_chunks(chunks) {
|
---|
| 185 | /*
|
---|
| 186 | * _wrap_chunks(chunks : [string]) -> [string]
|
---|
| 187 | *
|
---|
| 188 | * Wrap a sequence of text chunks and return a list of lines of
|
---|
| 189 | * length 'self.width' or less. (If 'break_long_words' is false,
|
---|
| 190 | * some lines may be longer than this.) Chunks correspond roughly
|
---|
| 191 | * to words and the whitespace between them: each chunk is
|
---|
| 192 | * indivisible (modulo 'break_long_words'), but a line break can
|
---|
| 193 | * come between any two chunks. Chunks should not have internal
|
---|
| 194 | * whitespace; ie. a chunk is either all whitespace or a "word".
|
---|
| 195 | * Whitespace chunks will be removed from the beginning and end of
|
---|
| 196 | * lines, but apart from that whitespace is preserved.
|
---|
| 197 | */
|
---|
| 198 | let lines = []
|
---|
| 199 | let indent
|
---|
| 200 | if (this.width <= 0) {
|
---|
| 201 | throw Error(`invalid width ${this.width} (must be > 0)`)
|
---|
| 202 | }
|
---|
| 203 | if (this.max_lines !== undefined) {
|
---|
| 204 | if (this.max_lines > 1) {
|
---|
| 205 | indent = this.subsequent_indent
|
---|
| 206 | } else {
|
---|
| 207 | indent = this.initial_indent
|
---|
| 208 | }
|
---|
| 209 | if (indent.length + this.placeholder.trimStart().length > this.width) {
|
---|
| 210 | throw Error('placeholder too large for max width')
|
---|
| 211 | }
|
---|
| 212 | }
|
---|
| 213 |
|
---|
| 214 | // Arrange in reverse order so items can be efficiently popped
|
---|
| 215 | // from a stack of chucks.
|
---|
| 216 | chunks = chunks.reverse()
|
---|
| 217 |
|
---|
| 218 | while (chunks.length > 0) {
|
---|
| 219 |
|
---|
| 220 | // Start the list of chunks that will make up the current line.
|
---|
| 221 | // cur_len is just the length of all the chunks in cur_line.
|
---|
| 222 | let cur_line = []
|
---|
| 223 | let cur_len = 0
|
---|
| 224 |
|
---|
| 225 | // Figure out which static string will prefix this line.
|
---|
| 226 | let indent
|
---|
| 227 | if (lines) {
|
---|
| 228 | indent = this.subsequent_indent
|
---|
| 229 | } else {
|
---|
| 230 | indent = this.initial_indent
|
---|
| 231 | }
|
---|
| 232 |
|
---|
| 233 | // Maximum width for this line.
|
---|
| 234 | let width = this.width - indent.length
|
---|
| 235 |
|
---|
| 236 | // First chunk on line is whitespace -- drop it, unless this
|
---|
| 237 | // is the very beginning of the text (ie. no lines started yet).
|
---|
| 238 | if (this.drop_whitespace && chunks[chunks.length - 1].trim() === '' && lines.length > 0) {
|
---|
| 239 | chunks.pop()
|
---|
| 240 | }
|
---|
| 241 |
|
---|
| 242 | while (chunks.length > 0) {
|
---|
| 243 | let l = chunks[chunks.length - 1].length
|
---|
| 244 |
|
---|
| 245 | // Can at least squeeze this chunk onto the current line.
|
---|
| 246 | if (cur_len + l <= width) {
|
---|
| 247 | cur_line.push(chunks.pop())
|
---|
| 248 | cur_len += l
|
---|
| 249 |
|
---|
| 250 | // Nope, this line is full.
|
---|
| 251 | } else {
|
---|
| 252 | break
|
---|
| 253 | }
|
---|
| 254 | }
|
---|
| 255 |
|
---|
| 256 | // The current line is full, and the next chunk is too big to
|
---|
| 257 | // fit on *any* line (not just this one).
|
---|
| 258 | if (chunks.length && chunks[chunks.length - 1].length > width) {
|
---|
| 259 | this._handle_long_word(chunks, cur_line, cur_len, width)
|
---|
| 260 | cur_len = cur_line.map(l => l.length).reduce((a, b) => a + b, 0)
|
---|
| 261 | }
|
---|
| 262 |
|
---|
| 263 | // If the last chunk on this line is all whitespace, drop it.
|
---|
| 264 | if (this.drop_whitespace && cur_line.length > 0 && cur_line[cur_line.length - 1].trim() === '') {
|
---|
| 265 | cur_len -= cur_line[cur_line.length - 1].length
|
---|
| 266 | cur_line.pop()
|
---|
| 267 | }
|
---|
| 268 |
|
---|
| 269 | if (cur_line) {
|
---|
| 270 | if (this.max_lines === undefined ||
|
---|
| 271 | lines.length + 1 < this.max_lines ||
|
---|
| 272 | (chunks.length === 0 ||
|
---|
| 273 | this.drop_whitespace &&
|
---|
| 274 | chunks.length === 1 &&
|
---|
| 275 | !chunks[0].trim()) && cur_len <= width) {
|
---|
| 276 | // Convert current line back to a string and store it in
|
---|
| 277 | // list of all lines (return value).
|
---|
| 278 | lines.push(indent + cur_line.join(''))
|
---|
| 279 | } else {
|
---|
| 280 | let had_break = false
|
---|
| 281 | while (cur_line) {
|
---|
| 282 | if (cur_line[cur_line.length - 1].trim() &&
|
---|
| 283 | cur_len + this.placeholder.length <= width) {
|
---|
| 284 | cur_line.push(this.placeholder)
|
---|
| 285 | lines.push(indent + cur_line.join(''))
|
---|
| 286 | had_break = true
|
---|
| 287 | break
|
---|
| 288 | }
|
---|
| 289 | cur_len -= cur_line[-1].length
|
---|
| 290 | cur_line.pop()
|
---|
| 291 | }
|
---|
| 292 | if (!had_break) {
|
---|
| 293 | if (lines) {
|
---|
| 294 | let prev_line = lines[lines.length - 1].trimEnd()
|
---|
| 295 | if (prev_line.length + this.placeholder.length <=
|
---|
| 296 | this.width) {
|
---|
| 297 | lines[lines.length - 1] = prev_line + this.placeholder
|
---|
| 298 | break
|
---|
| 299 | }
|
---|
| 300 | }
|
---|
| 301 | lines.push(indent + this.placeholder.lstrip())
|
---|
| 302 | }
|
---|
| 303 | break
|
---|
| 304 | }
|
---|
| 305 | }
|
---|
| 306 | }
|
---|
| 307 |
|
---|
| 308 | return lines
|
---|
| 309 | }
|
---|
| 310 |
|
---|
| 311 | _split_chunks(text) {
|
---|
| 312 | text = this._munge_whitespace(text)
|
---|
| 313 | return this._split(text)
|
---|
| 314 | }
|
---|
| 315 |
|
---|
| 316 | // -- Public interface ----------------------------------------------
|
---|
| 317 |
|
---|
| 318 | wrap(text) {
|
---|
| 319 | /*
|
---|
| 320 | * wrap(text : string) -> [string]
|
---|
| 321 | *
|
---|
| 322 | * Reformat the single paragraph in 'text' so it fits in lines of
|
---|
| 323 | * no more than 'self.width' columns, and return a list of wrapped
|
---|
| 324 | * lines. Tabs in 'text' are expanded with string.expandtabs(),
|
---|
| 325 | * and all other whitespace characters (including newline) are
|
---|
| 326 | * converted to space.
|
---|
| 327 | */
|
---|
| 328 | let chunks = this._split_chunks(text)
|
---|
| 329 | // not implemented in js
|
---|
| 330 | //if (this.fix_sentence_endings) {
|
---|
| 331 | // this._fix_sentence_endings(chunks)
|
---|
| 332 | //}
|
---|
| 333 | return this._wrap_chunks(chunks)
|
---|
| 334 | }
|
---|
| 335 |
|
---|
| 336 | fill(text) {
|
---|
| 337 | /*
|
---|
| 338 | * fill(text : string) -> string
|
---|
| 339 | *
|
---|
| 340 | * Reformat the single paragraph in 'text' to fit in lines of no
|
---|
| 341 | * more than 'self.width' columns, and return a new string
|
---|
| 342 | * containing the entire wrapped paragraph.
|
---|
| 343 | */
|
---|
| 344 | return this.wrap(text).join('\n')
|
---|
| 345 | }
|
---|
| 346 | }
|
---|
| 347 |
|
---|
| 348 |
|
---|
| 349 | // -- Convenience interface ---------------------------------------------
|
---|
| 350 |
|
---|
| 351 | function wrap(text, options = {}) {
|
---|
| 352 | /*
|
---|
| 353 | * Wrap a single paragraph of text, returning a list of wrapped lines.
|
---|
| 354 | *
|
---|
| 355 | * Reformat the single paragraph in 'text' so it fits in lines of no
|
---|
| 356 | * more than 'width' columns, and return a list of wrapped lines. By
|
---|
| 357 | * default, tabs in 'text' are expanded with string.expandtabs(), and
|
---|
| 358 | * all other whitespace characters (including newline) are converted to
|
---|
| 359 | * space. See TextWrapper class for available keyword args to customize
|
---|
| 360 | * wrapping behaviour.
|
---|
| 361 | */
|
---|
| 362 | let { width = 70, ...kwargs } = options
|
---|
| 363 | let w = new TextWrapper(Object.assign({ width }, kwargs))
|
---|
| 364 | return w.wrap(text)
|
---|
| 365 | }
|
---|
| 366 |
|
---|
| 367 | function fill(text, options = {}) {
|
---|
| 368 | /*
|
---|
| 369 | * Fill a single paragraph of text, returning a new string.
|
---|
| 370 | *
|
---|
| 371 | * Reformat the single paragraph in 'text' to fit in lines of no more
|
---|
| 372 | * than 'width' columns, and return a new string containing the entire
|
---|
| 373 | * wrapped paragraph. As with wrap(), tabs are expanded and other
|
---|
| 374 | * whitespace characters converted to space. See TextWrapper class for
|
---|
| 375 | * available keyword args to customize wrapping behaviour.
|
---|
| 376 | */
|
---|
| 377 | let { width = 70, ...kwargs } = options
|
---|
| 378 | let w = new TextWrapper(Object.assign({ width }, kwargs))
|
---|
| 379 | return w.fill(text)
|
---|
| 380 | }
|
---|
| 381 |
|
---|
| 382 | // -- Loosely related functionality -------------------------------------
|
---|
| 383 |
|
---|
| 384 | let _whitespace_only_re = /^[ \t]+$/mg
|
---|
| 385 | let _leading_whitespace_re = /(^[ \t]*)(?:[^ \t\n])/mg
|
---|
| 386 |
|
---|
| 387 | function dedent(text) {
|
---|
| 388 | /*
|
---|
| 389 | * Remove any common leading whitespace from every line in `text`.
|
---|
| 390 | *
|
---|
| 391 | * This can be used to make triple-quoted strings line up with the left
|
---|
| 392 | * edge of the display, while still presenting them in the source code
|
---|
| 393 | * in indented form.
|
---|
| 394 | *
|
---|
| 395 | * Note that tabs and spaces are both treated as whitespace, but they
|
---|
| 396 | * are not equal: the lines " hello" and "\\thello" are
|
---|
| 397 | * considered to have no common leading whitespace.
|
---|
| 398 | *
|
---|
| 399 | * Entirely blank lines are normalized to a newline character.
|
---|
| 400 | */
|
---|
| 401 | // Look for the longest leading string of spaces and tabs common to
|
---|
| 402 | // all lines.
|
---|
| 403 | let margin = undefined
|
---|
| 404 | text = text.replace(_whitespace_only_re, '')
|
---|
| 405 | let indents = text.match(_leading_whitespace_re) || []
|
---|
| 406 | for (let indent of indents) {
|
---|
| 407 | indent = indent.slice(0, -1)
|
---|
| 408 |
|
---|
| 409 | if (margin === undefined) {
|
---|
| 410 | margin = indent
|
---|
| 411 |
|
---|
| 412 | // Current line more deeply indented than previous winner:
|
---|
| 413 | // no change (previous winner is still on top).
|
---|
| 414 | } else if (indent.startsWith(margin)) {
|
---|
| 415 | // pass
|
---|
| 416 |
|
---|
| 417 | // Current line consistent with and no deeper than previous winner:
|
---|
| 418 | // it's the new winner.
|
---|
| 419 | } else if (margin.startsWith(indent)) {
|
---|
| 420 | margin = indent
|
---|
| 421 |
|
---|
| 422 | // Find the largest common whitespace between current line and previous
|
---|
| 423 | // winner.
|
---|
| 424 | } else {
|
---|
| 425 | for (let i = 0; i < margin.length && i < indent.length; i++) {
|
---|
| 426 | if (margin[i] !== indent[i]) {
|
---|
| 427 | margin = margin.slice(0, i)
|
---|
| 428 | break
|
---|
| 429 | }
|
---|
| 430 | }
|
---|
| 431 | }
|
---|
| 432 | }
|
---|
| 433 |
|
---|
| 434 | if (margin) {
|
---|
| 435 | text = text.replace(new RegExp('^' + margin, 'mg'), '')
|
---|
| 436 | }
|
---|
| 437 | return text
|
---|
| 438 | }
|
---|
| 439 |
|
---|
| 440 | module.exports = { wrap, fill, dedent }
|
---|