-
-
Notifications
You must be signed in to change notification settings - Fork 496
Add parse::format whitespace formatter to gix-config (#2594)
#2636
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Amey Pawar (ameyypawar)
wants to merge
1
commit into
GitoxideLabs:main
Choose a base branch
from
ameyypawar:gix-config-formatter
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,193 @@ | ||
| //! Reformat a git-config file with normalized, sanitized whitespace. | ||
| //! | ||
| //! This operates purely on the syntactic [event stream](crate::parse::Events) of a single | ||
| //! file. `include`/`includeIf` directives are *never* resolved here - those are only acted upon | ||
| //! when constructing a [`File`](crate::File) - so the formatter is "flat" by construction. | ||
| //! | ||
| //! Values, comments and section headers are reproduced verbatim; only insignificant whitespace, | ||
| //! newlines and the `=` separator are rewritten according to [`Options`](crate::parse::format::Options). | ||
|
|
||
| use bstr::BString; | ||
|
|
||
| use crate::parse::{self, Event}; | ||
|
|
||
| /// How key/value lines beneath a section header are indented. | ||
| #[derive(Debug, Clone, Eq, PartialEq)] | ||
| pub enum Indentation { | ||
| /// A single horizontal tab per line - git's de-facto writer style. | ||
| Tab, | ||
| /// The given number of spaces per line. | ||
| Spaces(usize), | ||
| /// No indentation at all. | ||
| None, | ||
| } | ||
|
|
||
| /// Which newline sequence to write between lines. | ||
| #[derive(Debug, Clone, Copy, Eq, PartialEq)] | ||
| pub enum Newline { | ||
| /// Use the first newline sequence found in the input, falling back to `\n` if none is present. | ||
| Detect, | ||
| /// Always use a Unix newline (`\n`). | ||
| Lf, | ||
| /// Always use a Windows newline (`\r\n`). | ||
| CrLf, | ||
| } | ||
|
|
||
| /// Options controlling [`normalize()`]. | ||
| /// | ||
| /// The defaults are intentionally conservative: they tidy the common sources of noise (stray | ||
| /// indentation, spacing around `=`, trailing whitespace, missing final newline) while leaving | ||
| /// blank lines and the substance of the file untouched. | ||
| /// | ||
| /// Note that trailing whitespace at the end of a line is always removed - it is never significant | ||
| /// in git-config syntax - so there is no option to retain it. | ||
| #[derive(Debug, Clone, Eq, PartialEq)] | ||
| pub struct Options { | ||
| /// How to indent key/value (and comment) lines beneath a section header. | ||
| pub indentation: Indentation, | ||
| /// If `true`, place a single space on each side of the `=` separator (`a = b`); | ||
| /// if `false`, emit a bare `=` (`a=b`). | ||
| pub spaces_around_separator: bool, | ||
| /// Which newline sequence to emit between lines. | ||
| pub newline: Newline, | ||
| /// If `true`, ensure a non-empty file ends with exactly one newline. | ||
| pub ensure_trailing_newline: bool, | ||
| /// If `Some(n)`, cap runs of consecutive blank lines at `n`. `None` (the default) leaves | ||
| /// blank lines exactly as they are. | ||
| pub max_consecutive_blank_lines: Option<usize>, | ||
| } | ||
|
|
||
| impl Default for Options { | ||
| fn default() -> Self { | ||
| Options { | ||
| indentation: Indentation::Spaces(2), | ||
| spaces_around_separator: true, | ||
| newline: Newline::Detect, | ||
| ensure_trailing_newline: true, | ||
| max_consecutive_blank_lines: None, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Parse `input` as a single git-config file and return it with whitespace normalized per `options`. | ||
| /// | ||
| /// Includes are never resolved. Values, comments and section headers are preserved byte-for-byte; | ||
| /// only insignificant whitespace, newlines and the `=` separator are rewritten. | ||
| /// | ||
| /// # Errors | ||
| /// | ||
| /// Returns a [`parse::Error`] if `input` is not a syntactically valid git-config file. | ||
| pub fn normalize(input: &[u8], options: &Options) -> Result<BString, parse::Error> { | ||
| let events = parse::Events::from_bytes(input, None)?.into_vec(); | ||
| Ok(normalize_events(&events, options)) | ||
| } | ||
|
|
||
| fn detect_newline(events: &[Event<'_>]) -> &'static [u8] { | ||
| for event in events { | ||
| if let Event::Newline(n) = event { | ||
| return if n.contains(&b'\r') { b"\r\n" } else { b"\n" }; | ||
| } | ||
| } | ||
| b"\n" | ||
| } | ||
|
|
||
| fn normalize_events(events: &[Event<'_>], opts: &Options) -> BString { | ||
| let newline: &[u8] = match opts.newline { | ||
| Newline::Detect => detect_newline(events), | ||
| Newline::Lf => b"\n", | ||
| Newline::CrLf => b"\r\n", | ||
| }; | ||
| let indent: Vec<u8> = match opts.indentation { | ||
| Indentation::Tab => vec![b'\t'], | ||
| Indentation::Spaces(n) => vec![b' '; n], | ||
| Indentation::None => Vec::new(), | ||
| }; | ||
|
|
||
| let mut out: Vec<u8> = Vec::with_capacity(events.len() * 8); | ||
| let mut in_section = false; | ||
| let mut line_has_content = false; | ||
| let mut i = 0; | ||
|
|
||
| while i < events.len() { | ||
| match &events[i] { | ||
| // Standalone, insignificant whitespace is dropped; we synthesize whitespace | ||
| // deterministically around the structural events below. | ||
| Event::Whitespace(_) => { | ||
| i += 1; | ||
| } | ||
| Event::SectionHeader(_) => { | ||
| events[i].write_to(&mut out).expect("write to Vec is infallible"); | ||
| in_section = true; | ||
| line_has_content = true; | ||
| i += 1; | ||
| } | ||
| Event::SectionValueName(_) => { | ||
| if in_section && !line_has_content { | ||
| out.extend_from_slice(&indent); | ||
| } | ||
| events[i].write_to(&mut out).expect("write to Vec is infallible"); | ||
| line_has_content = true; | ||
| i += 1; | ||
| } | ||
| Event::KeyValueSeparator => { | ||
| if opts.spaces_around_separator { | ||
| out.extend_from_slice(b" = "); | ||
| } else { | ||
| out.push(b'='); | ||
| } | ||
| line_has_content = true; | ||
| i += 1; | ||
| } | ||
| Event::Value(_) | Event::ValueDone(_) => { | ||
| events[i].write_to(&mut out).expect("write to Vec is infallible"); | ||
| line_has_content = true; | ||
| i += 1; | ||
| } | ||
| // A line-continuation span: emit everything verbatim through the closing `ValueDone`, | ||
| // so whitespace that the parser folded into the continued value is never touched. | ||
| Event::ValueNotDone(_) => { | ||
| loop { | ||
| let is_done = matches!(events[i], Event::ValueDone(_)); | ||
| events[i].write_to(&mut out).expect("write to Vec is infallible"); | ||
| i += 1; | ||
| if is_done || i >= events.len() { | ||
| break; | ||
| } | ||
| } | ||
| line_has_content = true; | ||
| } | ||
| Event::Comment(_) => { | ||
| if line_has_content { | ||
| // Inline comment trailing a value/header: one space before the marker. | ||
| out.push(b' '); | ||
| } else if in_section { | ||
| out.extend_from_slice(&indent); | ||
| } | ||
| events[i].write_to(&mut out).expect("write to Vec is infallible"); | ||
| line_has_content = true; | ||
| i += 1; | ||
| } | ||
| Event::Newline(n) => { | ||
| let mut count = n.iter().filter(|&&b| b == b'\n').count(); | ||
| if let Some(max_blank) = opts.max_consecutive_blank_lines { | ||
| // `count` newlines produce `count - 1` blank lines. | ||
| count = count.min(max_blank + 1); | ||
| } | ||
| for _ in 0..count { | ||
| out.extend_from_slice(newline); | ||
| } | ||
| line_has_content = false; | ||
| i += 1; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if opts.ensure_trailing_newline && !out.is_empty() { | ||
| while out.last() == Some(&b'\n') || out.last() == Some(&b'\r') { | ||
| out.pop(); | ||
| } | ||
| out.extend_from_slice(newline); | ||
| } | ||
|
|
||
| out.into() | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,185 @@ | ||
| use gix_config::parse::{ | ||
| Events, | ||
| format::{self, Indentation, Newline, Options}, | ||
| }; | ||
|
|
||
| fn norm(input: &str) -> String { | ||
| let out = format::normalize(input.as_bytes(), &Options::default()).expect("valid config"); | ||
| String::from_utf8(out.into()).expect("utf8") | ||
| } | ||
|
|
||
| /// Collect (section, name, value) triples from a config's event stream so two configs can be | ||
| /// compared for *meaning* rather than bytes. | ||
| fn semantic_triples(input: &str) -> Vec<(String, String, String)> { | ||
| use gix_config::parse::Event; | ||
| let events = Events::from_str(input).expect("valid").into_vec(); | ||
| let mut out = Vec::new(); | ||
| let mut section = String::new(); | ||
| let mut pending_name: Option<String> = None; | ||
| let mut value = String::new(); | ||
| for ev in &events { | ||
| match ev { | ||
| Event::SectionHeader(h) => section = h.to_bstring().to_string(), | ||
| Event::SectionValueName(_) => { | ||
| if let Some(name) = pending_name.take() { | ||
| out.push((section.clone(), name, std::mem::take(&mut value))); | ||
| } | ||
| pending_name = Some(ev.to_bstr_lossy().to_string()); | ||
| value.clear(); | ||
| } | ||
| Event::Value(_) | Event::ValueDone(_) | Event::ValueNotDone(_) => { | ||
| value.push_str(&ev.to_bstr_lossy().to_string()); | ||
| } | ||
| _ => {} | ||
| } | ||
| } | ||
| if let Some(name) = pending_name.take() { | ||
| out.push((section, name, value)); | ||
| } | ||
| out | ||
| } | ||
|
|
||
| #[test] | ||
| fn default_policy_basic() { | ||
| // 4-space indent collapses to the 2-space default; trailing whitespace and tight `=` are fixed. | ||
| let input = "[core]\n editor=vim \n"; | ||
| assert_eq!(norm(input), "[core]\n editor = vim\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn meaning_is_preserved() { | ||
| for input in [ | ||
| "[core]\n editor=vim\n", | ||
| "[remote \"origin\"]\n\turl = https://example.com/x.git\n", | ||
| "[a]\nx=1\ny = 2\n[b]\nz=3\n", | ||
| "[user]\n\tname = A B ; trailing comment\n", | ||
| ] { | ||
| assert_eq!( | ||
| semantic_triples(&norm(input)), | ||
| semantic_triples(input), | ||
| "formatting must not change meaning for: {input:?}" | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn line_continuation_value_is_untouched() { | ||
| // The continued line's leading whitespace is part of the value and must survive verbatim. | ||
| let input = "[alias]\nsave = \"!f() { \\\n git status; \\\n}; f\"\n"; | ||
| let out = norm(input); | ||
| assert_eq!( | ||
| semantic_triples(&out), | ||
| semantic_triples(input), | ||
| "continuation value bytes must be preserved" | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn trailing_backslash_at_eof() { | ||
| let input = "[core]\na=hello\\"; | ||
| // Must parse and round-trip without panicking or corrupting the continuation. | ||
| let out = norm(input); | ||
| assert_eq!(semantic_triples(&out), semantic_triples(input)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn implicit_boolean_key_keeps_no_separator() { | ||
| let input = "[core]\n autocrlf\n"; | ||
| assert_eq!(norm(input), "[core]\n autocrlf\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn comments_are_preserved() { | ||
| let input = "; top comment\n[core]\n# inner\n\teditor = vim ; inline\n"; | ||
| let out = norm(input); | ||
| assert!(out.contains("; top comment")); | ||
| assert!(out.contains("# inner")); | ||
| assert!(out.contains("; inline")); | ||
| } | ||
|
|
||
| #[test] | ||
| fn quoted_subsection_and_value_verbatim() { | ||
| let input = "[test \"sub \\\"x\\\"\"]\n\tpath = \"C:\\\\root\"\n"; | ||
| assert_eq!(semantic_triples(&norm(input)), semantic_triples(input)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn crlf_is_detected_and_normalized() { | ||
| let input = "[core]\r\n editor=vim\r\n"; | ||
| assert_eq!(norm(input), "[core]\r\n editor = vim\r\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn blank_lines_left_alone_by_default() { | ||
| let input = "[a]\nx = 1\n\n\n[b]\ny = 2\n"; | ||
| assert_eq!(norm(input), "[a]\n x = 1\n\n\n[b]\n y = 2\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn blank_lines_collapsed_when_requested() { | ||
| let opts = Options { | ||
| max_consecutive_blank_lines: Some(1), | ||
| ..Options::default() | ||
| }; | ||
| let out = format::normalize("[a]\nx = 1\n\n\n\n[b]\ny = 2\n".as_bytes(), &opts).unwrap(); | ||
| assert_eq!(String::from_utf8(out.into()).unwrap(), "[a]\n x = 1\n\n[b]\n y = 2\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn spaces_around_separator_can_be_disabled() { | ||
| let opts = Options { | ||
| spaces_around_separator: false, | ||
| indentation: Indentation::None, | ||
| ..Options::default() | ||
| }; | ||
| let out = format::normalize("[core]\n editor = vim\n".as_bytes(), &opts).unwrap(); | ||
| assert_eq!(String::from_utf8(out.into()).unwrap(), "[core]\neditor=vim\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn tab_indentation_option() { | ||
| let opts = Options { | ||
| indentation: Indentation::Tab, | ||
| ..Options::default() | ||
| }; | ||
| let out = format::normalize("[core]\n editor=vim\n".as_bytes(), &opts).unwrap(); | ||
| assert_eq!(String::from_utf8(out.into()).unwrap(), "[core]\n\teditor = vim\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn no_indentation_option() { | ||
| let opts = Options { | ||
| indentation: Indentation::None, | ||
| ..Options::default() | ||
| }; | ||
| let out = format::normalize("[core]\n editor=vim\n".as_bytes(), &opts).unwrap(); | ||
| assert_eq!(String::from_utf8(out.into()).unwrap(), "[core]\neditor = vim\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn force_lf_newline() { | ||
| let opts = Options { | ||
| newline: Newline::Lf, | ||
| ..Options::default() | ||
| }; | ||
| let out = format::normalize("[core]\r\n editor = vim\r\n".as_bytes(), &opts).unwrap(); | ||
| assert_eq!(String::from_utf8(out.into()).unwrap(), "[core]\n editor = vim\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn force_crlf_newline() { | ||
| let opts = Options { | ||
| newline: Newline::CrLf, | ||
| ..Options::default() | ||
| }; | ||
| let out = format::normalize("[core]\n editor = vim\n".as_bytes(), &opts).unwrap(); | ||
| assert_eq!(String::from_utf8(out.into()).unwrap(), "[core]\r\n editor = vim\r\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn idempotent() { | ||
| let input = "[core]\n editor=vim\n[remote \"o\"]\nurl = x\n"; | ||
| let once = norm(input); | ||
| let twice = norm(&once); | ||
| assert_eq!(once, twice); | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With the default
max_consecutive_blank_lines: None, callers are told blank lines are left exactly as-is, but this loop strips every trailing\r/\nthat was already emitted and then appends a single newline. For an input like[a]\nx = 1\n\n, normalization silently removes the final blank line even though blank-line collapsing was not requested;ensure_trailing_newlineshould add a newline only when one is missing, or otherwise honor the collapse option explicitly.Useful? React with 👍 / 👎.