1use std::{num::NonZeroUsize, ops::Range, sync::LazyLock};
2
3use phf::phf_set;
4use regex::Regex;
5use regex_automata::{Input, PatternID, meta::Regex as MetaRegex};
6use serde::Deserialize;
7
8#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
10pub struct StyleSpan {
11 pub start: usize,
13 pub end: usize,
15 pub token_type: TokenType,
17}
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
21pub enum TokenType {
22 Keyword,
23 Identifier,
24 String,
25 Number,
26 Regex,
27 Comment,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
35#[serde(rename_all = "camelCase")]
36pub enum Language {
37 #[default]
39 JavaScript,
40 Css,
42}
43
44impl Language {
45 pub fn is_keyword(self, ident: &str) -> bool {
47 match self {
48 Language::JavaScript => JS_KEYWORDS.contains(ident),
49 Language::Css => false,
50 }
51 }
52}
53
54static JS_KEYWORDS: phf::Set<&'static str> = phf_set! {
56 "as",
57 "async",
58 "await",
59 "break",
60 "case",
61 "catch",
62 "class",
63 "const",
64 "continue",
65 "debugger",
66 "default",
67 "delete",
68 "do",
69 "else",
70 "enum",
71 "export",
72 "extends",
73 "false",
74 "finally",
75 "for",
76 "from",
77 "function",
78 "if",
79 "implements",
80 "import",
81 "in",
82 "instanceof",
83 "interface",
84 "let",
85 "new",
86 "null",
87 "of",
88 "package",
89 "private",
90 "protected",
91 "public",
92 "return",
93 "static",
94 "super",
95 "switch",
96 "this",
97 "throw",
98 "true",
99 "try",
100 "type",
101 "typeof",
102 "undefined",
103 "var",
104 "void",
105 "while",
106 "with",
107 "yield",
108};
109
110#[derive(Debug, Clone, Copy)]
112pub struct ColorScheme {
113 pub reset: &'static str,
114 pub keyword: &'static str,
115 pub identifier: &'static str,
116 pub string: &'static str,
117 pub number: &'static str,
118 pub regex: &'static str,
119 pub comment: &'static str,
120 pub gutter: &'static str,
121 pub marker: &'static str,
122 pub message: &'static str,
123}
124
125impl ColorScheme {
126 pub const fn colored() -> Self {
128 Self {
129 reset: "\x1b[0m",
130 keyword: "\x1b[36m", identifier: "\x1b[33m", string: "\x1b[32m", number: "\x1b[35m", regex: "\x1b[35m", comment: "\x1b[90m", gutter: "\x1b[90m", marker: "\x1b[31m\x1b[1m", message: "\x1b[31m\x1b[1m", }
140 }
141
142 pub const fn plain() -> Self {
144 Self {
145 reset: "",
146 keyword: "",
147 identifier: "",
148 string: "",
149 number: "",
150 regex: "",
151 comment: "",
152 gutter: "",
153 marker: "",
154 message: "",
155 }
156 }
157
158 pub fn color_for_token(&self, token_type: TokenType) -> &'static str {
160 match token_type {
161 TokenType::Keyword => self.keyword,
162 TokenType::Identifier => self.identifier,
163 TokenType::String => self.string,
164 TokenType::Number => self.number,
165 TokenType::Regex => self.regex,
166 TokenType::Comment => self.comment,
167 }
168 }
169}
170
171pub(crate) struct Lines<'a> {
184 source: &'a str,
185 line_starts: Vec<usize>,
188 first_line: usize,
190 total_lines: usize,
192}
193
194impl<'a> Lines<'a> {
195 #[cfg(test)]
197 pub fn new(source: &'a str) -> Self {
198 Self::windowed(source, 0, usize::MAX)
199 }
200
201 pub fn windowed(source: &'a str, window_start: usize, window_end: usize) -> Self {
209 let bytes = source.as_bytes();
210
211 let store_start = window_start.saturating_sub(MAX_BACKSCAN_LINES);
214 let store_end = window_end.saturating_add(1);
216
217 let mut line_starts = Vec::new();
218 let mut line_num: usize = 0;
219 if store_start == 0 {
221 line_starts.push(0);
222 }
223 line_num += 1;
224
225 for found in memchr::Memchr3::new(b'\n', b'\r', b'\xE2', bytes) {
226 let b = bytes[found];
227 let line_start = if b == b'\n' {
228 found + 1
229 } else if b == b'\r' {
230 if found + 1 < bytes.len() && bytes[found + 1] == b'\n' {
232 continue;
233 }
234 found + 1
236 } else {
237 if found + 2 < bytes.len()
243 && bytes[found + 1] == 0x80
244 && (bytes[found + 2] == 0xA8 || bytes[found + 2] == 0xA9)
245 {
246 found + 3
247 } else {
248 continue;
251 }
252 };
253
254 if line_num >= store_end {
255 return Self {
257 source,
258 line_starts,
259 first_line: store_start,
260 total_lines: line_num + 1,
261 };
262 }
263 if line_num >= store_start {
264 line_starts.push(line_start);
265 }
266 line_num += 1;
267 }
268
269 Self {
271 source,
272 line_starts,
273 first_line: store_start.min(line_num.saturating_sub(1)),
274 total_lines: line_num,
275 }
276 }
277
278 pub fn len(&self) -> NonZeroUsize {
280 NonZeroUsize::new(self.total_lines).unwrap()
282 }
283
284 pub fn source(&self) -> &'a str {
286 self.source
287 }
288
289 pub fn starts(&self) -> &[usize] {
292 &self.line_starts
293 }
294
295 pub fn first_line(&self) -> usize {
297 self.first_line
298 }
299
300 pub fn content(&self, idx: usize) -> &'a str {
307 let (start, end) = self.byte_bounds(idx);
308 let line = &self.source[start..end];
309 line.strip_suffix("\r\n")
310 .or_else(|| line.strip_suffix('\n'))
311 .or_else(|| line.strip_suffix('\r'))
312 .or_else(|| line.strip_suffix('\u{2028}'))
313 .or_else(|| line.strip_suffix('\u{2029}'))
314 .unwrap_or(line)
315 }
316
317 pub fn byte_bounds(&self, idx: usize) -> (usize, usize) {
320 let local = idx - self.first_line;
321 let start = self
322 .line_starts
323 .get(local)
324 .copied()
325 .unwrap_or(self.source.len());
326 let end = self
327 .line_starts
328 .get(local + 1)
329 .copied()
330 .unwrap_or(self.source.len());
331 (start, end)
332 }
333}
334
335fn lookup_line(line_starts: &[usize], byte_offset: usize) -> usize {
337 match line_starts.binary_search(&byte_offset) {
338 Ok(idx) => idx,
339 Err(idx) => idx.saturating_sub(1),
340 }
341}
342
343fn line_bounds(line_starts: &[usize], source_len: usize, line_idx: usize) -> (usize, usize) {
345 let start = line_starts.get(line_idx).copied().unwrap_or(source_len);
346 let end = line_starts.get(line_idx + 1).copied().unwrap_or(source_len);
347 (start, end)
348}
349
350struct Scanner<'a> {
358 markers: Vec<StyleSpan>,
359 line_starts: &'a [usize],
360 source: &'a str,
361 output_ranges: Vec<(usize, usize)>,
364 language: Language,
365}
366
367impl<'a> Scanner<'a> {
368 fn new(
369 line_starts: &'a [usize],
370 source: &'a str,
371 output_ranges: Vec<(usize, usize)>,
372 language: Language,
373 ) -> Self {
374 Self {
375 markers: Vec::new(),
376 line_starts,
377 source,
378 output_ranges,
379 language,
380 }
381 }
382
383 fn output_end(&self) -> usize {
385 self.output_ranges.last().map_or(0, |r| r.1)
386 }
387
388 #[inline]
390 fn overlaps_output(&self, start: usize, end: usize) -> bool {
391 for &(rs, re) in &self.output_ranges {
394 if rs >= end {
395 return false;
396 }
397 if re > start {
398 return true;
399 }
400 }
401 false
402 }
403
404 fn add_span(&mut self, start: usize, end: usize, token_type: TokenType) {
410 if start >= end {
411 return;
412 }
413
414 if !self.overlaps_output(start, end) {
415 return;
416 }
417
418 let source_len = self.source.len();
419 let start_line = lookup_line(self.line_starts, start);
420 let end_line = lookup_line(self.line_starts, end.saturating_sub(1));
421
422 if start_line != end_line {
423 for line_idx in start_line..=end_line {
425 let (line_start, line_end) = line_bounds(self.line_starts, source_len, line_idx);
426 let span_start = start.max(line_start);
427 let span_end = end.min(line_end);
428 if span_start < span_end && self.overlaps_output(span_start, span_end) {
429 self.markers.push(StyleSpan {
430 start: span_start,
431 end: span_end,
432 token_type,
433 });
434 }
435 }
436 return;
437 }
438
439 self.markers.push(StyleSpan {
440 start,
441 end,
442 token_type,
443 });
444 }
445}
446
447const MAX_BACKSCAN_LINES: usize = 200;
454
455fn find_scan_start(lines: &Lines<'_>, target_line: usize, visible_start: usize) -> usize {
473 let mut result = 0;
474
475 if target_line > 0 {
477 let first = lines.first_line();
478 let search_start = target_line.saturating_sub(MAX_BACKSCAN_LINES).max(first);
479
480 result = 'line: {
481 for line_idx in (search_start..target_line).rev() {
482 if lines.content(line_idx).trim().is_empty() {
483 let (start, _) = lines.byte_bounds(line_idx);
484 break 'line start;
485 }
486 }
487 if search_start > first {
488 0
489 } else {
490 let (start, _) = lines.byte_bounds(search_start);
491 start
492 }
493 };
494 }
495
496 const MIN_SKIP_DISTANCE: usize = 200;
500 if visible_start > result + MIN_SKIP_DISTANCE {
501 let search_from = result;
502 let window = &lines.source().as_bytes()[search_from..visible_start];
503 if let Some(pos) = window.iter().rposition(|&b| b == b';') {
504 result = search_from + pos + 1;
505 }
506 }
507
508 result
509}
510
511pub fn extract_highlights(
532 lines: &Lines<'_>,
533 line_range: Range<usize>,
534 language: Language,
535 visible_window: Option<(usize, usize)>,
536) -> Vec<Vec<StyleSpan>> {
537 let line_starts = lines.starts();
538 let first_line = lines.first_line();
539 let source = lines.source();
540 let local_count = line_starts.len();
541
542 let local_start = line_range.start - first_line;
543 let local_end = line_range.end - first_line;
544
545 let output_ranges: Vec<(usize, usize)> = (local_start..local_end.min(local_count))
549 .filter_map(|local_idx| {
550 let ls = line_starts[local_idx];
551 let line_end = line_starts
552 .get(local_idx + 1)
553 .copied()
554 .unwrap_or(source.len());
555 let (rs, re) = if let Some((trunc_offset, avail_width)) = visible_window {
556 (
557 (ls + trunc_offset).min(line_end),
558 (ls + trunc_offset + avail_width).min(line_end),
559 )
560 } else {
561 (ls, line_end)
562 };
563 if rs < re { Some((rs, re)) } else { None }
564 })
565 .collect();
566
567 let visible_start = output_ranges.first().map_or(0, |r| r.0);
570 let scan_start = find_scan_start(lines, line_range.start, visible_start);
571
572 let scan_end = output_ranges.last().map_or(source.len(), |r| r.1);
573 let mut scanner = Scanner::new(line_starts, source, output_ranges, language);
574 scanner.scan(scan_start, scan_end, None);
575 let all_spans = scanner.markers;
576
577 debug_assert!(
578 all_spans.windows(2).all(|w| w[0].start <= w[1].start),
579 "spans should already be sorted by the left-to-right scan"
580 );
581 debug_assert!(
582 all_spans.windows(2).all(|w| w[0].end <= w[1].start),
583 "spans should be non-overlapping"
584 );
585 group_spans_by_line(&all_spans, line_starts, first_line, source, line_range)
586}
587
588#[derive(Debug, Clone, Copy, PartialEq, Eq)]
594enum TokenKind {
595 String,
596 Template,
597 LineComment,
598 BlockComment,
599 Number,
600 Ident,
601 Close,
602 Brace,
603 Postfix,
604 Slash,
605 Op,
606}
607
608const TOKEN_RULES: &[(TokenKind, &str)] = &[
613 (
614 TokenKind::String,
615 r#""(?:[^"\\]|\\.)*"?|'(?:[^'\\]|\\.)*'?"#,
616 ),
617 (TokenKind::Template, r"`"),
624 (TokenKind::LineComment, r"//[^\n]*"),
625 (TokenKind::BlockComment, r"(?s)/\*.*?\*/"),
626 (
627 TokenKind::Number,
628 r"0[xX][\da-fA-F]+|0[oO][0-7]+|0[bB][01]+|(?:\d*\.\d+|\d+\.?)(?:[eE][+-]?\d+)?",
629 ),
630 (TokenKind::Ident, r"[A-Za-z_$\x80-\xff][\w$\x80-\xff]*"),
631 (TokenKind::Close, r"[)\]]"),
632 (TokenKind::Brace, r"[(\[{}]"),
633 (TokenKind::Postfix, r"\+\+|--"),
634 (TokenKind::Slash, r"/"),
635 (TokenKind::Op, r"[=+\-*%<>&|^!~?:;,.]"),
637];
638
639impl TokenKind {
640 fn from_pattern_id(id: PatternID) -> Self {
641 TOKEN_RULES[id.as_usize()].0
642 }
643}
644
645static TOKEN_RE: LazyLock<MetaRegex> = LazyLock::new(|| {
650 let patterns: Vec<&str> = TOKEN_RULES.iter().map(|(_, p)| *p).collect();
651 MetaRegex::new_many(&patterns).expect("token patterns must compile")
652});
653
654static REGEX_LITERAL_RE: LazyLock<Regex> = LazyLock::new(|| {
663 Regex::new(r#"/(?:[^\\/\[\n\r]|\\.|\[(?:[^\]\\\n\r]|\\.)*\])+/[A-Za-z]*"#)
664 .expect("regex literal regex must compile")
665});
666
667impl Scanner<'_> {
668 fn scan_template(&mut self, tpl_start: usize, scan_end: usize) -> usize {
681 let bytes = self.source.as_bytes();
682 let search_start = tpl_start + 1;
683
684 let mut seg_start = tpl_start;
687
688 let mut i = search_start;
690
691 let iter = memchr::Memchr2::new(b'`', b'$', &bytes[search_start..scan_end]);
699 for found in iter {
700 let pos = search_start + found;
701 if pos < i {
703 continue;
704 }
705
706 let mut backslashes = 0;
709 while pos > search_start + backslashes && bytes[pos - 1 - backslashes] == b'\\' {
710 backslashes += 1;
711 }
712 if backslashes % 2 != 0 {
713 i = pos + 1;
714 continue;
715 }
716
717 let b = bytes[pos];
718 if b == b'`' {
719 self.add_span(seg_start, pos + 1, TokenType::String);
721 return pos + 1;
722 }
723 debug_assert_eq!(b, b'$');
725 if pos + 1 < scan_end && bytes[pos + 1] == b'{' {
726 if pos > seg_start {
728 self.add_span(seg_start, pos, TokenType::String);
729 }
730
731 let expr_start = pos + 2;
737 let expr_end = self.scan(expr_start, scan_end, Some(1));
738
739 if expr_end > expr_start && bytes.get(expr_end - 1) == Some(&b'}') {
741 seg_start = expr_end - 1;
742 } else {
743 seg_start = expr_end;
745 }
746 i = expr_end;
747 continue;
748 }
749 i = pos + 1;
751 }
752
753 if scan_end > seg_start {
755 self.add_span(seg_start, scan_end, TokenType::String);
756 }
757 scan_end
758 }
759
760 fn scan(&mut self, start_pos: usize, scan_end: usize, mut brace_depth: Option<u32>) -> usize {
768 let mut pos = start_pos;
769
770 let mut last_token = LastToken::None;
774
775 while let Some(m) = TOKEN_RE.search(&Input::new(self.source).range(pos..scan_end)) {
776 let start = m.start();
777 let raw_end = m.end();
778
779 if start >= self.output_end() {
781 break;
782 }
783
784 let end = raw_end.min(scan_end);
786
787 match TokenKind::from_pattern_id(m.pattern()) {
788 TokenKind::String => {
789 self.add_span(start, end, TokenType::String);
790 last_token = LastToken::Value;
791 }
792 TokenKind::Template => {
793 let tpl_end = self.scan_template(start, scan_end);
797 last_token = LastToken::Value;
798 pos = tpl_end;
799 continue;
801 }
802 TokenKind::LineComment | TokenKind::BlockComment => {
803 self.add_span(start, end, TokenType::Comment);
804 }
806 TokenKind::Postfix => {
807 last_token = LastToken::PostfixOp;
808 }
809 TokenKind::Slash => {
810 if last_token.slash_means_regex()
811 && let Some(re_match) = REGEX_LITERAL_RE.find_at(self.source, start)
812 && re_match.start() == start
813 {
814 let re_end = re_match.end().min(scan_end);
815 self.add_span(start, re_end, TokenType::Regex);
816 last_token = LastToken::Value;
817 pos = re_end;
818 continue;
819 }
820 last_token = LastToken::Operator;
821 }
822 TokenKind::Close => {
823 last_token = LastToken::CloseBracket;
824 }
825 TokenKind::Brace => {
826 let ch = self.source.as_bytes()[start];
827 if ch == b'{' {
828 if let Some(ref mut depth) = brace_depth {
829 *depth += 1;
830 }
831 } else if ch == b'}'
832 && let Some(ref mut depth) = brace_depth
833 {
834 if *depth <= 1 {
836 return end;
837 }
838 *depth -= 1;
839 }
840 last_token = LastToken::Operator;
841 }
842 TokenKind::Op => {
843 last_token = LastToken::Operator;
844 }
845 TokenKind::Number => {
846 self.add_span(start, end, TokenType::Number);
847 last_token = LastToken::Value;
848 }
849 TokenKind::Ident => {
850 let ident = &self.source[start..end];
851 let token_type = if self.language.is_keyword(ident) {
852 Some(TokenType::Keyword)
853 } else if ident.as_bytes()[0].is_ascii_uppercase() {
854 Some(TokenType::Identifier)
856 } else {
857 None
858 };
859 if let Some(tt) = token_type {
860 self.add_span(start, end, tt);
861 }
862 last_token = LastToken::Value;
863 }
864 }
865
866 assert!(
867 raw_end > pos,
868 "TOKEN_RE produced a zero-width match at byte {pos}"
869 );
870 pos = raw_end;
871 }
872
873 scan_end
874 }
875}
876
877#[derive(Debug, Clone, Copy, PartialEq, Eq)]
880enum LastToken {
881 None,
883 Value,
885 CloseBracket,
887 PostfixOp,
889 Operator,
891}
892
893impl LastToken {
894 fn slash_means_regex(self) -> bool {
896 match self {
897 LastToken::None | LastToken::Operator => true,
898 LastToken::Value | LastToken::CloseBracket | LastToken::PostfixOp => false,
899 }
900 }
901}
902
903fn group_spans_by_line(
909 spans: &[StyleSpan],
910 line_starts: &[usize],
911 first_line: usize,
912 source: &str,
913 line_range: Range<usize>,
914) -> Vec<Vec<StyleSpan>> {
915 if source.is_empty() {
916 return Vec::new();
917 }
918
919 let line_count = first_line + line_starts.len();
920
921 let start_line_idx = line_range.start.min(line_count);
922 let end_line_idx = line_range.end.min(line_count);
923
924 let output_line_count = end_line_idx.saturating_sub(start_line_idx);
925 let mut line_highlights = Vec::with_capacity(output_line_count);
926
927 let mut span_idx = 0;
928
929 for line_idx in start_line_idx..end_line_idx {
930 let local_idx = line_idx - first_line;
931 let (line_start, line_end) = line_bounds(line_starts, source.len(), local_idx);
932
933 let mut line_spans = Vec::new();
934
935 while span_idx < spans.len() {
936 let span = &spans[span_idx];
937
938 if span.start >= line_end {
939 break;
940 }
941 debug_assert!(
942 span.start >= line_start,
943 "span at {} precedes line start {line_start}",
944 span.start
945 );
946
947 line_spans.push(StyleSpan {
948 start: span.start - line_start,
949 end: span.end - line_start,
950 token_type: span.token_type,
951 });
952
953 span_idx += 1;
954 }
955
956 line_highlights.push(line_spans);
957 }
958
959 line_highlights
960}
961
962pub fn apply_line_highlights(
974 visible_content: &str,
975 spans: &[StyleSpan],
976 color_scheme: &ColorScheme,
977 truncation_offset: usize,
978 prefix_len: usize,
979) -> String {
980 if spans.is_empty() {
981 return visible_content.to_string();
982 }
983
984 let visible_end = truncation_offset + visible_content.len().saturating_sub(prefix_len);
986
987 let mut result = String::with_capacity(visible_content.len() + spans.len() * 10);
988 let mut last_offset = 0;
989
990 let start_idx = spans.partition_point(|s| s.end <= truncation_offset);
992
993 for span in &spans[start_idx..] {
994 if span.start >= visible_end {
995 break;
996 }
997
998 let display_start = (span.start.max(truncation_offset) - truncation_offset + prefix_len)
1000 .min(visible_content.len());
1001 let display_end =
1002 (span.end.min(visible_end) - truncation_offset + prefix_len).min(visible_content.len());
1003
1004 if display_start < display_end {
1005 if display_start > last_offset {
1007 result.push_str(&visible_content[last_offset..display_start]);
1008 }
1009 result.push_str(color_scheme.color_for_token(span.token_type));
1011 result.push_str(&visible_content[display_start..display_end]);
1012 result.push_str(color_scheme.reset);
1013 last_offset = display_end;
1014 }
1015 }
1016
1017 if last_offset < visible_content.len() {
1019 result.push_str(&visible_content[last_offset..]);
1020 }
1021
1022 result
1023}
1024
1025#[cfg(test)]
1026pub mod tests {
1027 use super::*;
1028
1029 const JS: Language = Language::JavaScript;
1031
1032 pub fn strip_ansi_codes(s: &str) -> String {
1034 let mut result = String::with_capacity(s.len());
1035 let mut chars = s.chars();
1036
1037 while let Some(ch) = chars.next() {
1038 if ch == '\x1b' {
1039 if chars.next() == Some('[') {
1040 for ch in chars.by_ref() {
1041 if ch.is_alphabetic() {
1042 break;
1043 }
1044 }
1045 }
1046 } else {
1047 result.push(ch);
1048 }
1049 }
1050
1051 result
1052 }
1053
1054 #[test]
1059 fn test_apply_line_highlights_basic() {
1060 let source = "const Foo = 123";
1061 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1062 let color_scheme = ColorScheme::colored();
1063
1064 let result = apply_line_highlights(source, &highlights[0], &color_scheme, 0, 0);
1065
1066 assert!(result.contains("\x1b["), "Result should contain ANSI codes");
1067 assert!(result.contains("const"), "Result should contain 'const'");
1068 assert!(result.contains("Foo"), "Result should contain 'Foo'");
1069 assert!(result.contains("123"), "Result should contain '123'");
1070 }
1071
1072 #[test]
1073 fn test_apply_line_highlights_plain() {
1074 let source = "const foo = 123";
1075 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1076 let color_scheme = ColorScheme::plain();
1077
1078 let result = apply_line_highlights(source, &highlights[0], &color_scheme, 0, 0);
1079 assert_eq!(result, source);
1080 }
1081
1082 #[test]
1083 fn test_only_capitalized_identifiers_highlighted() {
1084 let source = "const foo = Bar";
1085 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1086
1087 let has_identifier = highlights[0]
1088 .iter()
1089 .any(|s| s.token_type == TokenType::Identifier);
1090 assert!(has_identifier, "Capitalized 'Bar' should be highlighted");
1091
1092 let ident_starts: Vec<usize> = highlights[0]
1093 .iter()
1094 .filter(|s| s.token_type == TokenType::Identifier)
1095 .map(|s| s.start)
1096 .collect();
1097 assert_eq!(
1098 ident_starts,
1099 vec![12],
1100 "Only 'Bar' at offset 12 should be highlighted"
1101 );
1102 }
1103
1104 #[test]
1105 fn test_strip_ansi_codes() {
1106 let input = "\x1b[36mconst\x1b[0m foo = \x1b[35m123\x1b[0m";
1107 let result = strip_ansi_codes(input);
1108 assert_eq!(result, "const foo = 123");
1109 }
1110
1111 #[test]
1112 fn test_apply_line_highlights_with_truncation() {
1113 let source = "const Foo = 123";
1114 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1115 let color_scheme = ColorScheme::colored();
1116
1117 let visible = &source[6..];
1119 let result = apply_line_highlights(visible, &highlights[0], &color_scheme, 6, 0);
1120
1121 let stripped = strip_ansi_codes(&result);
1122 assert_eq!(stripped, "Foo = 123");
1123 assert!(
1124 result.contains("\x1b["),
1125 "Should contain ANSI codes for Foo/123"
1126 );
1127 }
1128
1129 #[test]
1130 fn test_apply_line_highlights_overlapping_truncation() {
1131 let source = r#"const x = "hello world";"#;
1134 let truncation_offset = 15;
1135 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1136 let color_scheme = ColorScheme::colored();
1137
1138 let visible = &source[truncation_offset..];
1139 let result =
1140 apply_line_highlights(visible, &highlights[0], &color_scheme, truncation_offset, 0);
1141
1142 let stripped = strip_ansi_codes(&result);
1143 assert_eq!(stripped, visible);
1144 assert!(
1147 result.starts_with("\x1b["),
1148 "Should start with ANSI code for the overlapping string: {result:?}"
1149 );
1150 }
1151
1152 #[test]
1153 fn test_comments_and_numbers() {
1154 let source = "const x = 42; // comment\nobj.foo = 10;";
1155 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1156
1157 assert_eq!(highlights.len(), 2);
1158
1159 let line1_has_comment = highlights[0]
1160 .iter()
1161 .any(|m| m.token_type == TokenType::Comment);
1162 assert!(line1_has_comment, "First line should have comment markers");
1163
1164 let line1_has_number = highlights[0]
1165 .iter()
1166 .any(|m| m.token_type == TokenType::Number);
1167 let line2_has_number = highlights[1]
1168 .iter()
1169 .any(|m| m.token_type == TokenType::Number);
1170 assert!(line1_has_number);
1171 assert!(line2_has_number);
1172 }
1173
1174 #[test]
1175 fn test_multiline_comment() {
1176 let source = "const x = 1;\n/* multi\n line */\nconst y = 2;";
1177 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1178
1179 assert_eq!(highlights.len(), 4);
1180
1181 let line2_has_comment = highlights[1]
1182 .iter()
1183 .any(|m| m.token_type == TokenType::Comment);
1184 let line3_has_comment = highlights[2]
1185 .iter()
1186 .any(|m| m.token_type == TokenType::Comment);
1187
1188 assert!(line2_has_comment, "Line 2 should have comment marker");
1189 assert!(line3_has_comment, "Line 3 should have comment marker");
1190 }
1191
1192 #[test]
1193 fn test_multiline_template_literal() {
1194 let source = "const x = `line1\nline2\nline3`;";
1195 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1196
1197 assert_eq!(highlights.len(), 3);
1198
1199 for (i, highlight) in highlights.iter().enumerate() {
1200 let has_string = highlight.iter().any(|m| m.token_type == TokenType::String);
1201 assert!(
1202 has_string,
1203 "Line {} should have string markers for the template literal",
1204 i + 1
1205 );
1206 }
1207 }
1208
1209 #[test]
1210 fn test_template_literal_with_expression() {
1211 let source = "const x = `hello ${name}!`;";
1214 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1215
1216 let string_spans: Vec<(usize, usize)> = highlights[0]
1217 .iter()
1218 .filter(|s| s.token_type == TokenType::String)
1219 .map(|s| (s.start, s.end))
1220 .collect();
1221
1222 assert!(
1225 string_spans.len() >= 2,
1226 "Should have at least 2 string segments: got {:?}",
1227 string_spans
1228 );
1229
1230 let name_offset = source.find("name").unwrap();
1232 let name_in_string = highlights[0].iter().any(|s| {
1233 s.token_type == TokenType::String && s.start <= name_offset && s.end > name_offset
1234 });
1235 assert!(
1236 !name_in_string,
1237 "'name' should not be marked as part of a string"
1238 );
1239 }
1240
1241 #[test]
1242 fn test_template_literal_nested() {
1243 let source = r#"const x = `a ${`b ${c}`} d`;"#;
1245 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1246
1247 assert!(!highlights.is_empty());
1249 let has_string = highlights[0]
1250 .iter()
1251 .any(|m| m.token_type == TokenType::String);
1252 assert!(has_string, "Should have string markers");
1253 }
1254
1255 #[test]
1260 fn test_template_unclosed_expression() {
1261 let source = "const x = `hello ${name";
1264 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1265 assert!(!highlights.is_empty(), "Should produce highlights");
1266
1267 let has_string = highlights[0]
1269 .iter()
1270 .any(|m| m.token_type == TokenType::String);
1271 assert!(has_string, "Should still mark the string part before ${{");
1272
1273 let name_offset = source.find("name").unwrap();
1275 let name_in_string = highlights[0].iter().any(|s| {
1276 s.token_type == TokenType::String && s.start <= name_offset && s.end > name_offset
1277 });
1278 assert!(
1279 !name_in_string,
1280 "'name' inside unclosed expression should not be a string"
1281 );
1282 }
1283
1284 #[test]
1285 fn test_template_brace_in_string_inside_expression() {
1286 let source = r#"const x = `${ "}" } end`;"#;
1288 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1289 assert!(!highlights.is_empty());
1290
1291 let end_offset = source.find(" end").unwrap();
1293 let has_end_string = highlights[0].iter().any(|s| {
1294 s.token_type == TokenType::String && s.start <= end_offset && s.end > end_offset
1295 });
1296 assert!(
1297 has_end_string,
1298 "String part after expression should be marked"
1299 );
1300 }
1301
1302 #[test]
1303 fn test_template_empty_expression() {
1304 let source = "const x = `hello ${}world`;";
1306 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1307 assert!(!highlights.is_empty());
1308
1309 let string_spans: Vec<usize> = highlights[0]
1311 .iter()
1312 .filter(|s| s.token_type == TokenType::String)
1313 .map(|s| s.start)
1314 .collect();
1315 assert!(
1316 string_spans.len() >= 2,
1317 "Empty expression should still split into two string segments, got {:?}",
1318 string_spans
1319 );
1320 }
1321
1322 #[test]
1323 fn test_template_nested_backtick_in_expression() {
1324 let source = r#"const x = `some${`template`}literal`;"#;
1326 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1327 assert!(!highlights.is_empty());
1328
1329 let literal_offset = source.rfind("literal").unwrap();
1331 let literal_is_string = highlights[0].iter().any(|s| {
1332 s.token_type == TokenType::String && s.start <= literal_offset && s.end > literal_offset
1333 });
1334 assert!(
1335 literal_is_string,
1336 "'literal' should be marked as string (outer template quasi), spans: {:?}",
1337 highlights[0]
1338 );
1339
1340 let template_offset = source.find("template").unwrap();
1342 let template_is_string = highlights[0].iter().any(|s| {
1343 s.token_type == TokenType::String
1344 && s.start <= template_offset
1345 && s.end > template_offset
1346 });
1347 assert!(
1348 template_is_string,
1349 "'template' should be marked as string (inner template), spans: {:?}",
1350 highlights[0]
1351 );
1352 }
1353
1354 #[test]
1355 fn test_template_block_comment_with_backtick_in_expression() {
1356 let source = r#"const x = `some${ /* ` */ ""}literal`;"#;
1358 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1359 assert!(!highlights.is_empty());
1360
1361 let comment_offset = source.find("/* ` */").unwrap();
1363 let comment_is_comment = highlights[0].iter().any(|s| {
1364 s.token_type == TokenType::Comment
1365 && s.start <= comment_offset
1366 && s.end > comment_offset
1367 });
1368 assert!(
1369 comment_is_comment,
1370 "'/* ` */' should be marked as comment, spans: {:?}",
1371 highlights[0]
1372 );
1373
1374 let literal_offset = source.rfind("literal").unwrap();
1376 let literal_is_string = highlights[0].iter().any(|s| {
1377 s.token_type == TokenType::String && s.start <= literal_offset && s.end > literal_offset
1378 });
1379 assert!(
1380 literal_is_string,
1381 "'literal' should be marked as string, spans: {:?}",
1382 highlights[0]
1383 );
1384 }
1385
1386 #[test]
1387 fn test_template_line_comment_with_backtick_in_expression() {
1388 let source = "const x = `some${ // `\n}literal`;";
1392 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1393 assert!(highlights.len() >= 2, "Should have at least 2 lines");
1394
1395 let line1 = "const x = `some${ // `";
1397 let comment_offset = line1.find("// `").unwrap();
1398 let comment_is_comment = highlights[0].iter().any(|s| {
1399 s.token_type == TokenType::Comment
1400 && s.start <= comment_offset
1401 && s.end > comment_offset
1402 });
1403 assert!(
1404 comment_is_comment,
1405 "'// `' should be marked as comment, spans: {:?}",
1406 highlights[0]
1407 );
1408
1409 let line2 = "}literal`;";
1412 let literal_offset = line2.find("literal").unwrap();
1413 let literal_is_string = highlights[1].iter().any(|s| {
1414 s.token_type == TokenType::String && s.start <= literal_offset && s.end > literal_offset
1415 });
1416 assert!(
1417 literal_is_string,
1418 "'literal' should be marked as string, spans: {:?}",
1419 highlights[1]
1420 );
1421 }
1422
1423 #[test]
1424 fn test_template_string_with_backtick_in_expression() {
1425 let source = r#"const x = `some${"`"}literal`;"#;
1427 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1428 assert!(!highlights.is_empty());
1429
1430 let inner_str_offset = source.find(r#""`""#).unwrap();
1432 let inner_is_string = highlights[0].iter().any(|s| {
1433 s.token_type == TokenType::String
1434 && s.start <= inner_str_offset
1435 && s.end > inner_str_offset
1436 });
1437 assert!(
1438 inner_is_string,
1439 r#"'"`"' should be marked as string, spans: {:?}"#,
1440 highlights[0]
1441 );
1442
1443 let literal_offset = source.rfind("literal").unwrap();
1445 let literal_is_string = highlights[0].iter().any(|s| {
1446 s.token_type == TokenType::String && s.start <= literal_offset && s.end > literal_offset
1447 });
1448 assert!(
1449 literal_is_string,
1450 "'literal' should be marked as string, spans: {:?}",
1451 highlights[0]
1452 );
1453 }
1454
1455 #[test]
1456 fn test_line_range_filtering() {
1457 let source = "const a = 1;\nconst b = 2;\nconst c = 3;\nconst d = 4;\nconst e = 5;";
1458
1459 let highlights = extract_highlights(&Lines::new(source), 1..4, JS, None);
1460
1461 assert_eq!(highlights.len(), 3);
1462 assert!(highlights.iter().all(|h| !h.is_empty()));
1463 }
1464
1465 #[test]
1470 fn test_regex_after_equals() {
1471 let source = "const re = /foo/gi;";
1472 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1473
1474 let has_regex = highlights[0]
1475 .iter()
1476 .any(|m| m.token_type == TokenType::Regex);
1477 assert!(has_regex, "/foo/gi should be highlighted as regex");
1478 }
1479
1480 #[test]
1481 fn test_division_not_regex() {
1482 let source = "const x = a / b / c;";
1484 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1485
1486 let has_regex = highlights[0]
1487 .iter()
1488 .any(|m| m.token_type == TokenType::Regex);
1489 assert!(!has_regex, "a / b / c should not have regex markers");
1490 }
1491
1492 #[test]
1497 fn test_js_keywords_highlighted() {
1498 let source = "const foo = function() { return true; }";
1499 let highlights = extract_highlights(&Lines::new(source), 0..usize::MAX, JS, None);
1500
1501 let keyword_starts: Vec<usize> = highlights[0]
1502 .iter()
1503 .filter(|s| s.token_type == TokenType::Keyword)
1504 .map(|s| s.start)
1505 .collect();
1506
1507 assert!(
1509 keyword_starts.contains(&0),
1510 "'const' should start at offset 0"
1511 );
1512 assert!(
1513 keyword_starts.contains(&12),
1514 "'function' should start at offset 12"
1515 );
1516 assert!(
1517 keyword_starts.contains(&25),
1518 "'return' should start at offset 25"
1519 );
1520 assert!(
1521 keyword_starts.contains(&32),
1522 "'true' should start at offset 32"
1523 );
1524 }
1525
1526 #[test]
1527 fn test_css_no_keywords() {
1528 let source = "const foo = function() { return true; }";
1529 let highlights =
1530 extract_highlights(&Lines::new(source), 0..usize::MAX, Language::Css, None);
1531
1532 let has_keyword = highlights[0]
1533 .iter()
1534 .any(|m| m.token_type == TokenType::Keyword);
1535 assert!(
1536 !has_keyword,
1537 "CSS language should not produce keyword markers"
1538 );
1539 }
1540
1541 #[test]
1546 fn test_block_comment_with_blank_line_known_limitation() {
1547 let mut source = String::new();
1557 for i in 0..20 {
1560 source.push_str(&format!("const x{i} = {i};\n"));
1561 }
1562 source.push_str("/** sneaky\n");
1563 source.push('\n'); source.push_str("*/\n");
1565 source.push_str("const after = 1;\n");
1566
1567 let lines = Lines::new(&source);
1568 let closer_line_idx = lines.len().get() - 3;
1570
1571 let highlights = extract_highlights(&lines, closer_line_idx..closer_line_idx + 1, JS, None);
1572 assert_eq!(highlights.len(), 1);
1573
1574 let has_comment = highlights[0]
1578 .iter()
1579 .any(|m| m.token_type == TokenType::Comment);
1580 assert!(
1581 !has_comment,
1582 "Known limitation: `*/` loses comment highlighting when the skip-scan heuristic \
1583 starts after the `/*` opener"
1584 );
1585 }
1586}