Skip to main content

turbopack_analyze/
split_chunk.rs

1use std::mem::replace;
2
3use anyhow::Result;
4use bincode::{Decode, Encode};
5use turbo_rcstr::RcStr;
6use turbo_tasks::{
7    FxIndexMap, NonLocalValue, ResolvedVc, ValueToString, ValueToStringRef, Vc, trace::TraceRawVcs,
8};
9use turbo_tasks_fs::{FileContent, FileLine, FileLinesContent, FileSystemPath, rope::Rope};
10use turbopack_core::{
11    asset::{Asset, AssetContent},
12    file_source::FileSource,
13    output::OutputAsset,
14    source_map::{GenerateSourceMap, OriginalToken, SourceMap, Token},
15};
16
17use crate::compressed_size::compressed_size_bytes;
18
19#[derive(Clone, Debug, Eq, NonLocalValue, PartialEq, TraceRawVcs, Encode, Decode)]
20pub struct ChunkPartRange {
21    pub line: u32,
22    pub start_column: u32,
23    pub end_column: u32,
24}
25
26#[derive(Clone, Debug, Eq, NonLocalValue, PartialEq, TraceRawVcs, Encode, Decode)]
27pub struct ChunkPart {
28    pub source: RcStr,
29    pub real_size: u32,
30    pub unaccounted_size: u32,
31    pub lines: ResolvedVc<FileLinesContent>,
32    pub ranges: Vec<ChunkPartRange>,
33}
34
35impl ChunkPart {
36    pub async fn get_compressed_size(&self) -> Result<Option<u32>> {
37        let lines = &*self.lines.await?;
38        let FileLinesContent::Lines(lines) = lines else {
39            return Ok(None);
40        };
41
42        if self.ranges.is_empty() {
43            let mut all_content = String::new();
44            for line in lines {
45                all_content.push_str(&line.content);
46            }
47            Ok(Some(compressed_size_bytes(all_content)?))
48        } else {
49            let mut all_range_content = String::new();
50            for range in &self.ranges {
51                append_content_between(
52                    range.line,
53                    range.start_column,
54                    range.line,
55                    range.end_column,
56                    lines,
57                    &mut all_range_content,
58                );
59            }
60            Ok(Some(compressed_size_bytes(all_range_content)?))
61        }
62    }
63}
64
65#[turbo_tasks::value(transparent)]
66#[derive(Debug)]
67pub struct ChunkParts(Vec<ChunkPart>);
68
69#[turbo_tasks::function]
70pub async fn split_traced_file_into_parts(path: FileSystemPath) -> Result<Vc<ChunkParts>> {
71    let source = FileSource::new(path.clone());
72    let content = source.content().await?;
73    let AssetContent::File(file_content) = &*content else {
74        return Ok(Vc::cell(vec![]));
75    };
76    let FileContent::Content(content) = &*file_content.await? else {
77        return Ok(Vc::cell(vec![]));
78    };
79    let content = content.content();
80    let lines_vc = file_content.lines().to_resolved().await?;
81
82    self_mapped(path.to_string_ref().await?, content, lines_vc).await
83}
84
85#[turbo_tasks::function]
86pub async fn split_output_asset_into_parts(
87    asset: Vc<Box<dyn OutputAsset>>,
88) -> Result<Vc<ChunkParts>> {
89    let content = asset.content().await?;
90    let AssetContent::File(file_content) = &*content else {
91        return Ok(Vc::cell(vec![]));
92    };
93    let FileContent::Content(content) = &*file_content.await? else {
94        return Ok(Vc::cell(vec![]));
95    };
96    let content = content.content();
97    let lines_vc = file_content.lines().to_resolved().await?;
98
99    let Some(generate_source_map) =
100        ResolvedVc::try_sidecast::<Box<dyn GenerateSourceMap>>(asset.to_resolved().await?)
101    else {
102        return self_mapped(asset.path().to_string().owned().await?, content, lines_vc).await;
103    };
104    let source_map = generate_source_map.generate_source_map().await?;
105    let Some(source_map) = source_map.as_content() else {
106        return self_mapped(asset.path().to_string().owned().await?, content, lines_vc).await;
107    };
108    let Some(source_map) = SourceMap::new_from_rope(source_map.content())? else {
109        return unaccounted(asset, content, lines_vc).await;
110    };
111
112    let lines = lines_vc.await?;
113    let FileLinesContent::Lines(lines) = &*lines else {
114        return unaccounted(asset, content, lines_vc).await;
115    };
116
117    fn end_of_mapping_column(
118        start_line: u32,
119        end_line: u32,
120        end_column: u32,
121        lines: &[FileLine],
122    ) -> u32 {
123        let start_line = start_line.min(lines.len() as u32 - 1);
124        let line_end = lines[start_line as usize].len() as u32;
125        if start_line == end_line {
126            end_column.min(line_end)
127        } else {
128            line_end
129        }
130    }
131
132    fn len_between(
133        start_line: u32,
134        start_column: u32,
135        end_line: u32,
136        end_column: u32,
137        lines: &[FileLine],
138    ) -> u32 {
139        let start_line = start_line.min(lines.len() as u32 - 1);
140        let end_line = end_line.min(lines.len() as u32 - 1);
141        if start_line == end_line {
142            // TODO: Figure out why start is larger than end sometimes
143            return end_column.saturating_sub(start_column);
144        }
145        let mut len = lines[start_line as usize].len() as u32 - start_column + 1;
146        for line in &lines[start_line as usize + 1..end_line as usize] {
147            len += line.len() as u32 + 1;
148        }
149        len += end_column;
150        len
151    }
152
153    let mut chunk_parts = FxIndexMap::default();
154    fn add_chunk_part_range(
155        source: RcStr,
156        chunk_part_range: ChunkPartRange,
157        size: u32,
158        chunk_parts: &mut FxIndexMap<RcStr, ChunkPart>,
159        lines: ResolvedVc<FileLinesContent>,
160    ) {
161        let entry = chunk_parts
162            .entry(source)
163            .or_insert_with_key(|source| ChunkPart {
164                source: source.clone(),
165                real_size: 0,
166                unaccounted_size: 0,
167                ranges: vec![],
168                lines,
169            });
170        entry.real_size += size;
171        entry.ranges.push(chunk_part_range);
172    }
173
174    fn add_unaccounted_chunk_part(
175        source: RcStr,
176        unaccounted: u32,
177        chunk_parts: &mut FxIndexMap<RcStr, ChunkPart>,
178        lines: ResolvedVc<FileLinesContent>,
179    ) {
180        let entry = chunk_parts
181            .entry(source)
182            .or_insert_with_key(|source| ChunkPart {
183                source: source.clone(),
184                real_size: 0,
185                unaccounted_size: 0,
186                ranges: vec![],
187                lines,
188            });
189        entry.unaccounted_size += unaccounted;
190    }
191
192    fn end_current_mapping(
193        source: RcStr,
194        current_line: u32,
195        start_column: u32,
196        next_line: u32,
197        next_column: u32,
198        lines: &[FileLine],
199        chunk_parts: &mut FxIndexMap<RcStr, ChunkPart>,
200        lines_vc: ResolvedVc<FileLinesContent>,
201    ) -> State {
202        let mapping_end_column = end_of_mapping_column(current_line, next_line, next_column, lines);
203        let len = mapping_end_column.saturating_sub(start_column);
204        add_chunk_part_range(
205            source.clone(),
206            ChunkPartRange {
207                line: current_line,
208                start_column,
209                end_column: mapping_end_column,
210            },
211            len,
212            chunk_parts,
213            lines_vc,
214        );
215        State::AfterMapping {
216            source,
217            generated_line: current_line,
218            current_generated_column: mapping_end_column,
219        }
220    }
221
222    fn should_extend_mapping(
223        state: &State,
224        new_source: &RcStr,
225        new_line: u32,
226        new_column: u32,
227    ) -> bool {
228        if let State::InMapping {
229            source,
230            generated_line,
231            end_column,
232            ..
233        } = state
234        {
235            // Extend if same source and line, and columns are adjacent or overlapping
236            // end_column <= new_column handles both adjacent (equal) and overlapping cases
237            source == new_source && *generated_line == new_line && *end_column <= new_column
238        } else {
239            false
240        }
241    }
242
243    enum State {
244        StartOfFile,
245        InMapping {
246            source: RcStr,
247            generated_line: u32,
248            start_column: u32,
249            end_column: u32,
250        },
251        AfterMapping {
252            source: RcStr,
253            generated_line: u32,
254            current_generated_column: u32,
255        },
256    }
257
258    let mut state: State = State::StartOfFile;
259
260    for token in source_map.tokens() {
261        if let Token::Original(OriginalToken {
262            original_file,
263            generated_line,
264            generated_column,
265            ..
266        }) = token
267        {
268            // Check if we can extend the current mapping
269            if should_extend_mapping(&state, &original_file, generated_line, generated_column) {
270                // Same source and line with adjacent columns - update end to next token position
271                if let State::InMapping {
272                    source,
273                    generated_line: current_line,
274                    start_column,
275                    ..
276                } = state
277                {
278                    state = State::InMapping {
279                        source,
280                        generated_line: current_line,
281                        start_column,
282                        end_column: generated_column,
283                    };
284                    continue;
285                }
286            }
287
288            // End the current mapping if we're in one
289            if let State::InMapping {
290                source,
291                generated_line: current_line,
292                start_column,
293                ..
294            } = state
295            {
296                state = end_current_mapping(
297                    source,
298                    current_line,
299                    start_column,
300                    generated_line,
301                    generated_column,
302                    lines,
303                    &mut chunk_parts,
304                    lines_vc,
305                );
306            }
307
308            // Start a new mapping and put the unaccounted part in between somewhere
309            // Set end_column to start_column initially; it will be updated when we see the next
310            // token
311            match replace(
312                &mut state,
313                State::InMapping {
314                    source: original_file.clone(),
315                    generated_line,
316                    start_column: generated_column,
317                    end_column: generated_column,
318                },
319            ) {
320                State::InMapping { .. } => {
321                    unreachable!();
322                }
323                State::AfterMapping {
324                    source,
325                    generated_line,
326                    current_generated_column,
327                } => {
328                    let len = len_between(
329                        generated_line,
330                        current_generated_column,
331                        generated_line,
332                        generated_column,
333                        lines,
334                    );
335                    let half = len / 2;
336                    add_unaccounted_chunk_part(source, half, &mut chunk_parts, lines_vc);
337                    add_unaccounted_chunk_part(
338                        original_file.clone(),
339                        len - half,
340                        &mut chunk_parts,
341                        lines_vc,
342                    );
343                }
344                State::StartOfFile => {
345                    let len = len_between(0, 0, generated_line, generated_column, lines);
346                    add_unaccounted_chunk_part(
347                        original_file.clone(),
348                        len,
349                        &mut chunk_parts,
350                        lines_vc,
351                    );
352                }
353            }
354        }
355    }
356    let last_line = lines.len() as u32 - 1;
357    let last_column = lines[last_line as usize].len() as u32;
358
359    // End the current token at end of file
360    if let State::InMapping {
361        ref source,
362        generated_line,
363        start_column,
364        ..
365    } = state
366    {
367        state = end_current_mapping(
368            source.clone(),
369            generated_line,
370            start_column,
371            last_line,
372            last_column,
373            lines,
374            &mut chunk_parts,
375            lines_vc,
376        );
377    }
378
379    match state {
380        State::InMapping { .. } => {
381            unreachable!();
382        }
383        State::AfterMapping {
384            source,
385            generated_line,
386            current_generated_column,
387        } => {
388            let len = len_between(
389                generated_line,
390                current_generated_column,
391                last_line,
392                last_column,
393                lines,
394            );
395            add_unaccounted_chunk_part(source, len, &mut chunk_parts, lines_vc);
396        }
397        State::StartOfFile => {
398            return unaccounted(asset, content, lines_vc).await;
399        }
400    }
401
402    Ok(Vc::cell(chunk_parts.into_values().collect()))
403}
404
405pub async fn self_mapped(
406    path: RcStr,
407    content: &Rope,
408    lines: ResolvedVc<FileLinesContent>,
409) -> Result<Vc<ChunkParts>> {
410    let len = content.len().try_into().unwrap_or(u32::MAX);
411    Ok(Vc::cell(vec![ChunkPart {
412        source: path,
413        real_size: len,
414        unaccounted_size: 0,
415        ranges: vec![],
416        lines,
417    }]))
418}
419
420async fn unaccounted(
421    asset: Vc<Box<dyn OutputAsset>>,
422    content: &Rope,
423    lines: ResolvedVc<FileLinesContent>,
424) -> Result<Vc<ChunkParts>> {
425    let len = content.len().try_into().unwrap_or(u32::MAX);
426    Ok(Vc::cell(vec![ChunkPart {
427        source: asset.path().to_string().owned().await?,
428        real_size: 0,
429        unaccounted_size: len,
430        ranges: vec![],
431        lines,
432    }]))
433}
434
435fn append_content_between(
436    start_line: u32,
437    start_column: u32,
438    end_line: u32,
439    end_column: u32,
440    lines: &[FileLine],
441    out: &mut String,
442) {
443    let start_line = start_line.min(lines.len() as u32 - 1);
444    let end_line = end_line.min(lines.len() as u32 - 1);
445
446    let start_column = start_column.min(lines[start_line as usize].len() as u32);
447    let end_column = if start_line == end_line {
448        end_column.min(lines[start_line as usize].len() as u32)
449    } else {
450        lines[start_line as usize].len() as u32
451    };
452
453    if end_column <= start_column {
454        return;
455    }
456
457    out.extend(
458        lines[start_line as usize]
459            .content
460            .chars()
461            .skip(start_column as usize)
462            .take((end_column - start_column) as usize),
463    );
464
465    if start_line == end_line {
466        return;
467    }
468
469    for line in &lines[start_line as usize + 1..end_line as usize] {
470        out.push_str(&line.content);
471    }
472
473    out.extend(
474        lines[end_line as usize]
475            .content
476            .chars()
477            .take(end_column as usize),
478    );
479}