physis/
exd_file_operations.rs

1// SPDX-FileCopyrightText: 2025 Joshua Goins <josh@redstrate.com>
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4use std::io::{Read, Seek, SeekFrom, Write};
5
6use binrw::{BinRead, BinResult, BinWrite, Endian};
7
8use crate::{
9    exd::{
10        ColumnData, DataSection, EXD, EXDHeader, ExcelDataOffset, ExcelRow, ExcelRowKind,
11        ExcelSingleRow,
12    },
13    exh::{ColumnDataType, EXH, ExcelColumnDefinition},
14};
15
16#[binrw::parser(reader)]
17pub fn read_data_sections(header: &EXDHeader) -> BinResult<Vec<DataSection>> {
18    let mut rows = Vec::new();
19
20    // we have to do this annoying thing because they specified it in bytes,
21    // not an actual count of data sections
22    let begin_pos = reader.stream_position().unwrap();
23    loop {
24        let current_pos = reader.stream_position().unwrap();
25        if current_pos - begin_pos >= header.data_section_size as u64 {
26            break;
27        }
28
29        let data_section = DataSection::read_be(reader).unwrap();
30        rows.push(data_section);
31    }
32
33    Ok(rows)
34}
35
36#[binrw::parser(reader)]
37pub fn parse_rows(exh: &EXH, data_offsets: &Vec<ExcelDataOffset>) -> BinResult<Vec<ExcelRow>> {
38    let mut rows = Vec::new();
39
40    for offset in data_offsets {
41        reader.seek(SeekFrom::Start(offset.offset.into()))?;
42
43        // TODO: use DataSection here
44        let _size: u32 = u32::read_be(reader).unwrap();
45        let row_count: u16 = u16::read_be(reader).unwrap();
46        //let row_header = DataSection::read(reader)?;
47
48        let data_offset = reader.stream_position().unwrap() as u32;
49
50        let mut read_row = |row_offset: u32| -> Option<ExcelSingleRow> {
51            let mut subrow = ExcelSingleRow {
52                columns: Vec::with_capacity(exh.column_definitions.len()),
53            };
54
55            for column in &exh.column_definitions {
56                reader
57                    .seek(SeekFrom::Start((row_offset + column.offset as u32).into()))
58                    .ok()?;
59
60                subrow
61                    .columns
62                    .push(EXD::read_column(reader, exh, row_offset, column).unwrap());
63            }
64
65            Some(subrow)
66        };
67
68        let new_row = if row_count > 1 {
69            let mut rows = Vec::new();
70            for i in 0..row_count {
71                let subrow_offset = data_offset + (i * exh.header.data_offset + 2 * (i + 1)) as u32;
72
73                rows.push(read_row(subrow_offset).unwrap());
74            }
75            ExcelRowKind::SubRows(rows)
76        } else {
77            ExcelRowKind::SingleRow(read_row(data_offset).unwrap())
78        };
79        rows.push(ExcelRow {
80            row_id: offset.row_id,
81            kind: new_row,
82        });
83    }
84
85    Ok(rows)
86}
87
88#[binrw::writer(writer)]
89pub fn write_rows(rows: &Vec<ExcelRow>, exh: &EXH) -> BinResult<()> {
90    // seek past the data offsets, which we will write later
91    let data_offsets_pos = writer.stream_position().unwrap();
92    writer
93        .seek(SeekFrom::Current(
94            (core::mem::size_of::<ExcelDataOffset>() * rows.len()) as i64,
95        ))
96        .unwrap();
97
98    let mut data_offsets = Vec::new();
99
100    for row in rows {
101        data_offsets.push(ExcelDataOffset {
102            row_id: row.row_id,
103            offset: writer.stream_position().unwrap() as u32,
104        });
105
106        // skip row header for now, because we don't know the size yet!
107        let row_header_pos = writer.stream_position().unwrap();
108
109        writer.seek(SeekFrom::Current(6)).unwrap(); // u32 + u16
110
111        let old_pos = writer.stream_position().unwrap();
112
113        // write column data
114        {
115            let mut write_row = |row: &ExcelSingleRow| {
116                let mut column_definitions: Vec<(ExcelColumnDefinition, ColumnData)> = exh
117                    .column_definitions
118                    .clone()
119                    .into_iter()
120                    .zip(row.columns.clone().into_iter())
121                    .collect::<Vec<_>>();
122
123                // we need to sort them by offset
124                column_definitions.sort_by(|(a, _), (b, _)| a.offset.cmp(&b.offset));
125
126                let mut strings_len = 0;
127                for (definition, column) in &column_definitions {
128                    EXD::write_column(writer, column, definition, &mut strings_len);
129
130                    // TODO: temporary workaround until i can figure out why it has 4 extra bytes in test_write's case
131                    if definition.data_type == ColumnDataType::Int8 && column_definitions.len() == 1
132                    {
133                        0u32.write_le(writer).unwrap();
134                    }
135                }
136
137                // handle packed bools
138                let mut packed_byte = 0u8;
139                let mut byte_offset = 0;
140
141                let mut write_packed_bool =
142                    |definition: &ExcelColumnDefinition, shift: i32, boolean: &bool| {
143                        byte_offset = definition.offset; // NOTE: it looks like there is only one byte for all of the packed booleans
144
145                        if *boolean {
146                            let bit = 1 << shift;
147                            packed_byte |= bit;
148                        }
149                    };
150
151                for (definition, column) in &column_definitions {
152                    match &column {
153                        ColumnData::Bool(val) => match definition.data_type {
154                            ColumnDataType::PackedBool0 => write_packed_bool(definition, 0, val),
155                            ColumnDataType::PackedBool1 => write_packed_bool(definition, 1, val),
156                            ColumnDataType::PackedBool2 => write_packed_bool(definition, 2, val),
157                            ColumnDataType::PackedBool3 => write_packed_bool(definition, 3, val),
158                            ColumnDataType::PackedBool4 => write_packed_bool(definition, 4, val),
159                            ColumnDataType::PackedBool5 => write_packed_bool(definition, 5, val),
160                            ColumnDataType::PackedBool6 => write_packed_bool(definition, 6, val),
161                            ColumnDataType::PackedBool7 => write_packed_bool(definition, 7, val),
162                            _ => {} // not relevant
163                        },
164                        _ => {} // not relevant
165                    }
166                }
167
168                // write the new packed boolean byte
169                // NOTE: This is a terrible way to check if there are packed booleans
170                // NOTE: Assumption: the packed boolean is always at the end of the row
171                if byte_offset != 0 {
172                    packed_byte.write_le(writer).unwrap();
173                }
174            };
175
176            match &row.kind {
177                ExcelRowKind::SingleRow(excel_single_row) => write_row(excel_single_row),
178                ExcelRowKind::SubRows(excel_single_rows) => {
179                    for row in excel_single_rows {
180                        write_row(row);
181                    }
182                }
183            }
184        }
185
186        // write strings at the end of column data
187        {
188            let mut write_row_strings = |row: &ExcelSingleRow| {
189                for column in &row.columns {
190                    if let ColumnData::String(val) = column {
191                        let bytes = val.as_bytes();
192                        bytes.write(writer).unwrap();
193
194                        // nul terminator
195                        0u8.write_le(writer).unwrap();
196                    }
197                }
198            };
199
200            match &row.kind {
201                ExcelRowKind::SingleRow(excel_single_row) => write_row_strings(excel_single_row),
202                ExcelRowKind::SubRows(excel_single_rows) => {
203                    for row in excel_single_rows {
204                        write_row_strings(row);
205                    }
206                }
207            }
208        }
209
210        // aligned to the next 4 byte boundary
211        let boundary_pos = writer.stream_position().unwrap();
212        let remainder = boundary_pos.div_ceil(4) * 4;
213        for _ in 0..remainder - boundary_pos {
214            0u8.write_le(writer).unwrap();
215        }
216
217        let new_pos = writer.stream_position().unwrap();
218
219        // write row header
220        writer.seek(SeekFrom::Start(row_header_pos)).unwrap();
221
222        let row_header = DataSection {
223            size: (new_pos - old_pos) as u32,
224            row_count: 1, // TODO: hardcoded
225        };
226        row_header.write(writer).unwrap();
227
228        // restore pos
229        writer.seek(SeekFrom::Start(new_pos)).unwrap();
230    }
231
232    // now write the data offsets
233    writer.seek(SeekFrom::Start(data_offsets_pos)).unwrap();
234    data_offsets.write(writer).unwrap();
235
236    Ok(())
237}
238
239impl EXD {
240    fn read_data_raw<T: Read + Seek, Z: BinRead<Args<'static> = ()>>(cursor: &mut T) -> Option<Z> {
241        Z::read_options(cursor, Endian::Big, ()).ok()
242    }
243
244    pub(crate) fn read_column<T: Read + Seek>(
245        cursor: &mut T,
246        exh: &EXH,
247        row_offset: u32,
248        column: &ExcelColumnDefinition,
249    ) -> Option<ColumnData> {
250        let mut read_packed_bool = |shift: i32| -> bool {
251            let bit = 1 << shift;
252            let bool_data: u8 = Self::read_data_raw(cursor).unwrap_or(0);
253
254            (bool_data & bit) == bit
255        };
256
257        match column.data_type {
258            ColumnDataType::String => {
259                let string_offset: u32 = Self::read_data_raw(cursor).unwrap();
260
261                cursor
262                    .seek(SeekFrom::Start(
263                        (row_offset + exh.header.data_offset as u32 + string_offset).into(),
264                    ))
265                    .ok()?;
266
267                let mut string = String::new();
268
269                let mut byte: u8 = Self::read_data_raw(cursor).unwrap();
270                while byte != 0 {
271                    string.push(byte as char);
272                    byte = Self::read_data_raw(cursor).unwrap();
273                }
274
275                Some(ColumnData::String(string))
276            }
277            ColumnDataType::Bool => {
278                // FIXME: i believe Bool is int8?
279                let bool_data: i32 = Self::read_data_raw(cursor).unwrap();
280
281                Some(ColumnData::Bool(bool_data == 1))
282            }
283            ColumnDataType::Int8 => Some(ColumnData::Int8(Self::read_data_raw(cursor).unwrap())),
284            ColumnDataType::UInt8 => Some(ColumnData::UInt8(Self::read_data_raw(cursor).unwrap())),
285            ColumnDataType::Int16 => Some(ColumnData::Int16(Self::read_data_raw(cursor).unwrap())),
286            ColumnDataType::UInt16 => {
287                Some(ColumnData::UInt16(Self::read_data_raw(cursor).unwrap()))
288            }
289            ColumnDataType::Int32 => Some(ColumnData::Int32(Self::read_data_raw(cursor).unwrap())),
290            ColumnDataType::UInt32 => {
291                Some(ColumnData::UInt32(Self::read_data_raw(cursor).unwrap()))
292            }
293            ColumnDataType::Float32 => {
294                Some(ColumnData::Float32(Self::read_data_raw(cursor).unwrap()))
295            }
296            ColumnDataType::Int64 => Some(ColumnData::Int64(Self::read_data_raw(cursor).unwrap())),
297            ColumnDataType::UInt64 => {
298                Some(ColumnData::UInt64(Self::read_data_raw(cursor).unwrap()))
299            }
300            ColumnDataType::PackedBool0 => Some(ColumnData::Bool(read_packed_bool(0))),
301            ColumnDataType::PackedBool1 => Some(ColumnData::Bool(read_packed_bool(1))),
302            ColumnDataType::PackedBool2 => Some(ColumnData::Bool(read_packed_bool(2))),
303            ColumnDataType::PackedBool3 => Some(ColumnData::Bool(read_packed_bool(3))),
304            ColumnDataType::PackedBool4 => Some(ColumnData::Bool(read_packed_bool(4))),
305            ColumnDataType::PackedBool5 => Some(ColumnData::Bool(read_packed_bool(5))),
306            ColumnDataType::PackedBool6 => Some(ColumnData::Bool(read_packed_bool(6))),
307            ColumnDataType::PackedBool7 => Some(ColumnData::Bool(read_packed_bool(7))),
308        }
309    }
310
311    fn write_data_raw<T: Write + Seek, Z: BinWrite<Args<'static> = ()>>(cursor: &mut T, value: &Z) {
312        value.write_options(cursor, Endian::Big, ()).unwrap()
313    }
314
315    pub(crate) fn write_column<T: Write + Seek>(
316        cursor: &mut T,
317        column: &ColumnData,
318        column_definition: &ExcelColumnDefinition,
319        strings_len: &mut u32,
320    ) {
321        match column {
322            ColumnData::String(val) => {
323                let string_offset = *strings_len;
324                Self::write_data_raw(cursor, &string_offset);
325                *strings_len += val.len() as u32 + 1;
326            }
327            ColumnData::Bool(_) => match column_definition.data_type {
328                ColumnDataType::Bool => todo!(),
329                // packed bools are handled in write_rows
330                ColumnDataType::PackedBool0 => {}
331                ColumnDataType::PackedBool1 => {}
332                ColumnDataType::PackedBool2 => {}
333                ColumnDataType::PackedBool3 => {}
334                ColumnDataType::PackedBool4 => {}
335                ColumnDataType::PackedBool5 => {}
336                ColumnDataType::PackedBool6 => {}
337                ColumnDataType::PackedBool7 => {}
338                _ => panic!("This makes no sense!"),
339            },
340            ColumnData::Int8(val) => Self::write_data_raw(cursor, val),
341            ColumnData::UInt8(val) => Self::write_data_raw(cursor, val),
342            ColumnData::Int16(val) => Self::write_data_raw(cursor, val),
343            ColumnData::UInt16(val) => Self::write_data_raw(cursor, val),
344            ColumnData::Int32(val) => Self::write_data_raw(cursor, val),
345            ColumnData::UInt32(val) => Self::write_data_raw(cursor, val),
346            ColumnData::Float32(val) => Self::write_data_raw(cursor, val),
347            ColumnData::Int64(val) => Self::write_data_raw(cursor, val),
348            ColumnData::UInt64(val) => Self::write_data_raw(cursor, val),
349        }
350    }
351}