turbopack_analyze/
split_chunk.rs

1use std::mem::replace;
2
3use anyhow::Result;
4use bincode::{Decode, Encode};
5use turbo_rcstr::RcStr;
6use turbo_tasks::{FxIndexMap, NonLocalValue, ResolvedVc, ValueToString, Vc, trace::TraceRawVcs};
7use turbo_tasks_fs::{FileContent, FileLine, FileLinesContent, rope::Rope};
8use turbopack_core::{
9    asset::{Asset, AssetContent},
10    output::OutputAsset,
11    source_map::{GenerateSourceMap, OriginalToken, SourceMap, Token},
12};
13
14use crate::compressed_size::compressed_size_bytes;
15
16#[derive(Clone, Debug, Eq, NonLocalValue, PartialEq, TraceRawVcs, Encode, Decode)]
17pub struct ChunkPartRange {
18    pub line: u32,
19    pub start_column: u32,
20    pub end_column: u32,
21}
22
23#[derive(Clone, Debug, Eq, NonLocalValue, PartialEq, TraceRawVcs, Encode, Decode)]
24pub struct ChunkPart {
25    pub source: RcStr,
26    pub real_size: u32,
27    pub unaccounted_size: u32,
28    pub lines: ResolvedVc<FileLinesContent>,
29    pub ranges: Vec<ChunkPartRange>,
30}
31
32impl ChunkPart {
33    pub async fn get_compressed_size(&self) -> Result<u32> {
34        let lines = &*self.lines.await?;
35        let FileLinesContent::Lines(lines) = lines else {
36            return Ok(0);
37        };
38
39        let mut all_range_content = String::new();
40        for range in &self.ranges {
41            append_content_between(
42                range.line,
43                range.start_column,
44                range.line,
45                range.end_column,
46                lines,
47                &mut all_range_content,
48            );
49        }
50        compressed_size_bytes(all_range_content.into())
51    }
52}
53
54#[turbo_tasks::value(transparent)]
55#[derive(Debug)]
56pub struct ChunkParts(Vec<ChunkPart>);
57
58#[turbo_tasks::function]
59pub async fn split_output_asset_into_parts(
60    asset: Vc<Box<dyn OutputAsset>>,
61) -> Result<Vc<ChunkParts>> {
62    let content = asset.content().await?;
63    let AssetContent::File(file_content) = &*content else {
64        return Ok(Vc::cell(vec![]));
65    };
66    let FileContent::Content(content) = &*file_content.await? else {
67        return Ok(Vc::cell(vec![]));
68    };
69    let content = content.content();
70    let lines_vc = file_content.lines().to_resolved().await?;
71
72    let Some(generate_source_map) =
73        Vc::try_resolve_sidecast::<Box<dyn GenerateSourceMap>>(asset).await?
74    else {
75        return self_mapped(asset, content, lines_vc).await;
76    };
77    let source_map = generate_source_map.generate_source_map().await?;
78    let Some(source_map) = source_map.as_content() else {
79        return self_mapped(asset, content, lines_vc).await;
80    };
81    let Some(source_map) = SourceMap::new_from_rope(source_map.content())? else {
82        return unaccounted(asset, content, lines_vc).await;
83    };
84
85    let lines = lines_vc.await?;
86    let FileLinesContent::Lines(lines) = &*lines else {
87        return unaccounted(asset, content, lines_vc).await;
88    };
89
90    fn end_of_mapping_column(
91        start_line: u32,
92        end_line: u32,
93        end_column: u32,
94        lines: &[FileLine],
95    ) -> u32 {
96        let start_line = start_line.min(lines.len() as u32 - 1);
97        let line_end = lines[start_line as usize].len() as u32;
98        if start_line == end_line {
99            end_column.min(line_end)
100        } else {
101            line_end
102        }
103    }
104
105    fn len_between(
106        start_line: u32,
107        start_column: u32,
108        end_line: u32,
109        end_column: u32,
110        lines: &[FileLine],
111    ) -> u32 {
112        let start_line = start_line.min(lines.len() as u32 - 1);
113        let end_line = end_line.min(lines.len() as u32 - 1);
114        if start_line == end_line {
115            // TODO: Figure out why start is larger than end sometimes
116            return end_column.saturating_sub(start_column);
117        }
118        let mut len = lines[start_line as usize].len() as u32 - start_column + 1;
119        for line in &lines[start_line as usize + 1..end_line as usize] {
120            len += line.len() as u32 + 1;
121        }
122        len += end_column;
123        len
124    }
125
126    let mut chunk_parts = FxIndexMap::default();
127    fn add_chunk_part_range(
128        source: RcStr,
129        chunk_part_range: ChunkPartRange,
130        size: u32,
131        chunk_parts: &mut FxIndexMap<RcStr, ChunkPart>,
132        lines: ResolvedVc<FileLinesContent>,
133    ) {
134        let entry = chunk_parts
135            .entry(source)
136            .or_insert_with_key(|source| ChunkPart {
137                source: source.clone(),
138                real_size: 0,
139                unaccounted_size: 0,
140                ranges: vec![],
141                lines,
142            });
143        entry.real_size += size;
144        entry.ranges.push(chunk_part_range);
145    }
146
147    fn add_unaccounted_chunk_part(
148        source: RcStr,
149        unaccounted: u32,
150        chunk_parts: &mut FxIndexMap<RcStr, ChunkPart>,
151        lines: ResolvedVc<FileLinesContent>,
152    ) {
153        let entry = chunk_parts
154            .entry(source)
155            .or_insert_with_key(|source| ChunkPart {
156                source: source.clone(),
157                real_size: 0,
158                unaccounted_size: 0,
159                ranges: vec![],
160                lines,
161            });
162        entry.unaccounted_size += unaccounted;
163    }
164
165    fn end_current_mapping(
166        source: RcStr,
167        current_line: u32,
168        start_column: u32,
169        next_line: u32,
170        next_column: u32,
171        lines: &[FileLine],
172        chunk_parts: &mut FxIndexMap<RcStr, ChunkPart>,
173        lines_vc: ResolvedVc<FileLinesContent>,
174    ) -> State {
175        let mapping_end_column = end_of_mapping_column(current_line, next_line, next_column, lines);
176        let len = mapping_end_column.saturating_sub(start_column);
177        add_chunk_part_range(
178            source.clone(),
179            ChunkPartRange {
180                line: current_line,
181                start_column,
182                end_column: mapping_end_column,
183            },
184            len,
185            chunk_parts,
186            lines_vc,
187        );
188        State::AfterMapping {
189            source,
190            generated_line: current_line,
191            current_generated_column: mapping_end_column,
192        }
193    }
194
195    fn should_extend_mapping(
196        state: &State,
197        new_source: &RcStr,
198        new_line: u32,
199        new_column: u32,
200    ) -> bool {
201        if let State::InMapping {
202            source,
203            generated_line,
204            end_column,
205            ..
206        } = state
207        {
208            // Extend if same source and line, and columns are adjacent or overlapping
209            // end_column <= new_column handles both adjacent (equal) and overlapping cases
210            source == new_source && *generated_line == new_line && *end_column <= new_column
211        } else {
212            false
213        }
214    }
215
216    enum State {
217        StartOfFile,
218        InMapping {
219            source: RcStr,
220            generated_line: u32,
221            start_column: u32,
222            end_column: u32,
223        },
224        AfterMapping {
225            source: RcStr,
226            generated_line: u32,
227            current_generated_column: u32,
228        },
229    }
230
231    let mut state: State = State::StartOfFile;
232
233    for token in source_map.tokens() {
234        if let Token::Original(OriginalToken {
235            original_file,
236            generated_line,
237            generated_column,
238            ..
239        }) = token
240        {
241            // Check if we can extend the current mapping
242            if should_extend_mapping(&state, &original_file, generated_line, generated_column) {
243                // Same source and line with adjacent columns - update end to next token position
244                if let State::InMapping {
245                    source,
246                    generated_line: current_line,
247                    start_column,
248                    ..
249                } = state
250                {
251                    state = State::InMapping {
252                        source,
253                        generated_line: current_line,
254                        start_column,
255                        end_column: generated_column,
256                    };
257                    continue;
258                }
259            }
260
261            // End the current mapping if we're in one
262            if let State::InMapping {
263                source,
264                generated_line: current_line,
265                start_column,
266                ..
267            } = state
268            {
269                state = end_current_mapping(
270                    source,
271                    current_line,
272                    start_column,
273                    generated_line,
274                    generated_column,
275                    lines,
276                    &mut chunk_parts,
277                    lines_vc,
278                );
279            }
280
281            // Start a new mapping and put the unaccounted part in between somewhere
282            // Set end_column to start_column initially; it will be updated when we see the next
283            // token
284            match replace(
285                &mut state,
286                State::InMapping {
287                    source: original_file.clone(),
288                    generated_line,
289                    start_column: generated_column,
290                    end_column: generated_column,
291                },
292            ) {
293                State::InMapping { .. } => {
294                    unreachable!();
295                }
296                State::AfterMapping {
297                    source,
298                    generated_line,
299                    current_generated_column,
300                } => {
301                    let len = len_between(
302                        generated_line,
303                        current_generated_column,
304                        generated_line,
305                        generated_column,
306                        lines,
307                    );
308                    let half = len / 2;
309                    add_unaccounted_chunk_part(source, half, &mut chunk_parts, lines_vc);
310                    add_unaccounted_chunk_part(
311                        original_file.clone(),
312                        len - half,
313                        &mut chunk_parts,
314                        lines_vc,
315                    );
316                }
317                State::StartOfFile => {
318                    let len = len_between(0, 0, generated_line, generated_column, lines);
319                    add_unaccounted_chunk_part(
320                        original_file.clone(),
321                        len,
322                        &mut chunk_parts,
323                        lines_vc,
324                    );
325                }
326            }
327        }
328    }
329    let last_line = lines.len() as u32 - 1;
330    let last_column = lines[last_line as usize].len() as u32;
331
332    // End the current token at end of file
333    if let State::InMapping {
334        ref source,
335        generated_line,
336        start_column,
337        ..
338    } = state
339    {
340        state = end_current_mapping(
341            source.clone(),
342            generated_line,
343            start_column,
344            last_line,
345            last_column,
346            lines,
347            &mut chunk_parts,
348            lines_vc,
349        );
350    }
351
352    match state {
353        State::InMapping { .. } => {
354            unreachable!();
355        }
356        State::AfterMapping {
357            source,
358            generated_line,
359            current_generated_column,
360        } => {
361            let len = len_between(
362                generated_line,
363                current_generated_column,
364                last_line,
365                last_column,
366                lines,
367            );
368            add_unaccounted_chunk_part(source, len, &mut chunk_parts, lines_vc);
369        }
370        State::StartOfFile => {
371            return unaccounted(asset, content, lines_vc).await;
372        }
373    }
374
375    Ok(Vc::cell(chunk_parts.into_values().collect()))
376}
377
378async fn self_mapped(
379    asset: Vc<Box<dyn OutputAsset>>,
380    content: &Rope,
381    lines: ResolvedVc<FileLinesContent>,
382) -> Result<Vc<ChunkParts>> {
383    let len = content.len().try_into().unwrap_or(u32::MAX);
384    Ok(Vc::cell(vec![ChunkPart {
385        source: asset.path().to_string().owned().await?,
386        real_size: len,
387        unaccounted_size: 0,
388        ranges: vec![],
389        lines,
390    }]))
391}
392
393async fn unaccounted(
394    asset: Vc<Box<dyn OutputAsset>>,
395    content: &Rope,
396    lines: ResolvedVc<FileLinesContent>,
397) -> Result<Vc<ChunkParts>> {
398    let len = content.len().try_into().unwrap_or(u32::MAX);
399    Ok(Vc::cell(vec![ChunkPart {
400        source: asset.path().to_string().owned().await?,
401        real_size: 0,
402        unaccounted_size: len,
403        ranges: vec![],
404        lines,
405    }]))
406}
407
408fn append_content_between(
409    start_line: u32,
410    start_column: u32,
411    end_line: u32,
412    end_column: u32,
413    lines: &[FileLine],
414    out: &mut String,
415) {
416    let start_line = start_line.min(lines.len() as u32 - 1);
417    let end_line = end_line.min(lines.len() as u32 - 1);
418
419    let start_column = start_column.min(lines[start_line as usize].len() as u32);
420    let end_column = if start_line == end_line {
421        end_column.min(lines[start_line as usize].len() as u32)
422    } else {
423        lines[start_line as usize].len() as u32
424    };
425
426    if end_column <= start_column {
427        return;
428    }
429
430    out.extend(
431        lines[start_line as usize]
432            .content
433            .chars()
434            .skip(start_column as usize)
435            .take((end_column - start_column) as usize),
436    );
437
438    if start_line == end_line {
439        return;
440    }
441
442    for line in &lines[start_line as usize + 1..end_line as usize] {
443        out.push_str(&line.content);
444    }
445
446    out.extend(
447        lines[end_line as usize]
448            .content
449            .chars()
450            .take(end_column as usize),
451    );
452}