physis/
exd_file_operations.rs

1// SPDX-FileCopyrightText: 2025 Joshua Goins <josh@redstrate.com>
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4use std::{
5    collections::HashMap,
6    io::{Read, Seek, SeekFrom, Write},
7};
8
9use binrw::{BinRead, BinResult, BinWrite, Endian};
10
11use crate::{
12    exd::{
13        ColumnData, DataSection, DataSectionHeader, EXD, EXDHeader, ExcelDataOffset, ExcelRow,
14        ExcelRowKind, ExcelSingleRow, SubRowHeader,
15    },
16    exh::{ColumnDataType, EXH, ExcelColumnDefinition, SheetRowKind},
17};
18
19#[binrw::parser(reader)]
20pub fn read_data_sections(header: &EXDHeader) -> BinResult<Vec<DataSection>> {
21    let mut rows = Vec::new();
22
23    // we have to do this annoying thing because they specified it in bytes,
24    // not an actual count of data sections
25    let begin_pos = reader.stream_position().unwrap();
26    loop {
27        let current_pos = reader.stream_position().unwrap();
28        if current_pos - begin_pos >= header.data_section_size as u64 {
29            break;
30        }
31
32        let data_section = DataSection::read_be(reader).unwrap();
33        rows.push(data_section);
34    }
35
36    Ok(rows)
37}
38
39fn read_row<T: Read + Seek>(reader: &mut T, exh: &EXH, row_offset: u64) -> Option<ExcelSingleRow> {
40    let mut subrow = ExcelSingleRow {
41        columns: Vec::with_capacity(exh.column_definitions.len()),
42    };
43
44    for column in &exh.column_definitions {
45        reader
46            .seek(SeekFrom::Start((row_offset + column.offset as u64).into()))
47            .ok()?;
48
49        subrow
50            .columns
51            .push(EXD::read_column(reader, exh, row_offset, column).unwrap());
52    }
53
54    Some(subrow)
55}
56
57#[binrw::parser(reader)]
58pub fn parse_rows(exh: &EXH, data_offsets: &Vec<ExcelDataOffset>) -> BinResult<Vec<ExcelRow>> {
59    let mut rows = Vec::new();
60
61    for offset in data_offsets {
62        reader.seek(SeekFrom::Start(offset.offset.into()))?;
63
64        let row_header = DataSectionHeader::read(reader)?;
65
66        let data_offset = reader.stream_position().unwrap() as u64;
67
68        let new_row = if exh.header.row_kind == SheetRowKind::SubRows {
69            let mut rows = Vec::new();
70            for i in 0..row_header.row_count {
71                let subrow_offset = data_offset + i as u64 * (2 + exh.header.row_size as u64);
72                reader.seek(SeekFrom::Start(subrow_offset))?;
73
74                let subrow_header = SubRowHeader::read(reader)?;
75                rows.push((
76                    subrow_header.subrow_id,
77                    read_row(reader, &exh, subrow_offset + 2).unwrap(),
78                ));
79            }
80            ExcelRowKind::SubRows(rows)
81        } else {
82            ExcelRowKind::SingleRow(read_row(reader, &exh, data_offset).unwrap())
83        };
84        rows.push(ExcelRow {
85            row_id: offset.row_id,
86            kind: new_row,
87        });
88    }
89
90    Ok(rows)
91}
92
93fn write_row<T: Write + Seek>(writer: &mut T, exh: &EXH, row: &ExcelSingleRow) {
94    let mut column_definitions: Vec<(ExcelColumnDefinition, ColumnData)> = exh
95        .column_definitions
96        .clone()
97        .into_iter()
98        .zip(row.columns.clone().into_iter())
99        .collect::<Vec<_>>();
100
101    // we need to sort them by offset
102    column_definitions.sort_by(|(a, _), (b, _)| a.offset.cmp(&b.offset));
103
104    // handle packed bools
105    let mut packed_bools: HashMap<u16, u8> = HashMap::new();
106
107    let mut write_packed_bool = |definition: &ExcelColumnDefinition, shift: i32, boolean: &bool| {
108        if !packed_bools.contains_key(&definition.offset) {
109            packed_bools.insert(definition.offset, 0u8);
110        }
111
112        if *boolean {
113            let bit = 1 << shift;
114            *packed_bools.get_mut(&definition.offset).unwrap() |= bit;
115        }
116    };
117
118    // process packed bools before continuing, since we need to know what their final byte form is
119    for (definition, column) in &column_definitions {
120        match &column {
121            ColumnData::Bool(val) => match definition.data_type {
122                ColumnDataType::PackedBool0 => write_packed_bool(definition, 0, val),
123                ColumnDataType::PackedBool1 => write_packed_bool(definition, 1, val),
124                ColumnDataType::PackedBool2 => write_packed_bool(definition, 2, val),
125                ColumnDataType::PackedBool3 => write_packed_bool(definition, 3, val),
126                ColumnDataType::PackedBool4 => write_packed_bool(definition, 4, val),
127                ColumnDataType::PackedBool5 => write_packed_bool(definition, 5, val),
128                ColumnDataType::PackedBool6 => write_packed_bool(definition, 6, val),
129                ColumnDataType::PackedBool7 => write_packed_bool(definition, 7, val),
130                _ => {} // not relevant
131            },
132            _ => {} // not relevant
133        }
134    }
135
136    let mut strings_len = 0;
137    for (definition, column) in &column_definitions {
138        EXD::write_column(
139            writer,
140            column,
141            definition,
142            &mut strings_len,
143            &mut packed_bools,
144        );
145
146        // TODO: temporary workaround until i can figure out why it has 4 extra bytes in test_write's case
147        if definition.data_type == ColumnDataType::Int8 && column_definitions.len() == 1 {
148            0u32.write_le(writer).unwrap();
149        }
150
151        // TODO: temporary workaround until i can figure out why this *specific* packed boolean column in TerritoryType has three extra bytes at the end
152        if definition.offset == 60
153            && definition.data_type == ColumnDataType::PackedBool0
154            && column_definitions.len() == 44
155        {
156            [0u8; 3].write_le(writer).unwrap();
157        }
158    }
159}
160
161#[binrw::writer(writer)]
162pub fn write_rows(rows: &Vec<ExcelRow>, exh: &EXH) -> BinResult<()> {
163    // seek past the data offsets, which we will write later
164    let data_offsets_pos = writer.stream_position().unwrap();
165    writer
166        .seek(SeekFrom::Current(
167            (core::mem::size_of::<ExcelDataOffset>() * rows.len()) as i64,
168        ))
169        .unwrap();
170
171    let mut data_offsets = Vec::new();
172
173    for row in rows {
174        data_offsets.push(ExcelDataOffset {
175            row_id: row.row_id,
176            offset: writer.stream_position().unwrap() as u32,
177        });
178
179        // skip row header for now, because we don't know the size yet!
180        let row_header_pos = writer.stream_position().unwrap();
181
182        writer.seek(SeekFrom::Current(6)).unwrap(); // u32 + u16
183
184        let old_pos = writer.stream_position().unwrap();
185
186        // write column data
187        match &row.kind {
188            ExcelRowKind::SingleRow(excel_single_row) => write_row(writer, &exh, excel_single_row),
189            ExcelRowKind::SubRows(excel_single_rows) => {
190                for (id, row) in excel_single_rows {
191                    let subrow_header = SubRowHeader { subrow_id: *id };
192                    subrow_header.write_ne(writer)?;
193
194                    write_row(writer, &exh, row);
195                }
196            }
197        }
198
199        // write strings at the end of column data
200        {
201            let mut write_row_strings = |row: &ExcelSingleRow| {
202                for column in &row.columns {
203                    if let ColumnData::String(val) = column {
204                        let bytes = val.as_bytes();
205                        bytes.write(writer).unwrap();
206
207                        // nul terminator
208                        0u8.write_le(writer).unwrap();
209                    }
210                }
211            };
212
213            match &row.kind {
214                ExcelRowKind::SingleRow(excel_single_row) => write_row_strings(excel_single_row),
215                ExcelRowKind::SubRows(excel_single_rows) => {
216                    for (_, row) in excel_single_rows {
217                        write_row_strings(row);
218                    }
219                }
220            }
221        }
222
223        // aligned to the next 4 byte boundary
224        let boundary_pos = writer.stream_position().unwrap();
225        let remainder = boundary_pos.div_ceil(4) * 4;
226        for _ in 0..remainder - boundary_pos {
227            0u8.write_le(writer).unwrap();
228        }
229
230        let new_pos = writer.stream_position().unwrap();
231
232        // write row header
233        writer.seek(SeekFrom::Start(row_header_pos)).unwrap();
234
235        let row_header = DataSectionHeader {
236            size: (new_pos - old_pos) as u32,
237            row_count: 1, // TODO: hardcoded
238        };
239        row_header.write(writer).unwrap();
240
241        // restore pos
242        writer.seek(SeekFrom::Start(new_pos)).unwrap();
243    }
244
245    // now write the data offsets
246    writer.seek(SeekFrom::Start(data_offsets_pos)).unwrap();
247    data_offsets.write(writer).unwrap();
248
249    Ok(())
250}
251
252impl EXD {
253    fn read_data_raw<T: Read + Seek, Z: BinRead<Args<'static> = ()>>(cursor: &mut T) -> Option<Z> {
254        Z::read_options(cursor, Endian::Big, ()).ok()
255    }
256
257    pub(crate) fn read_column<T: Read + Seek>(
258        cursor: &mut T,
259        exh: &EXH,
260        row_offset: u64,
261        column: &ExcelColumnDefinition,
262    ) -> Option<ColumnData> {
263        let mut read_packed_bool = |shift: i32| -> bool {
264            let bit = 1 << shift;
265            let bool_data: u8 = Self::read_data_raw(cursor).unwrap_or(0);
266
267            (bool_data & bit) == bit
268        };
269
270        match column.data_type {
271            ColumnDataType::String => {
272                let string_offset: u32 = Self::read_data_raw(cursor).unwrap();
273
274                cursor
275                    .seek(SeekFrom::Start(
276                        (row_offset + exh.header.row_size as u64 + string_offset as u64).into(),
277                    ))
278                    .ok()?;
279
280                let mut string = String::new();
281
282                let mut byte: u8 = Self::read_data_raw(cursor).unwrap();
283                while byte != 0 {
284                    string.push(byte as char);
285                    byte = Self::read_data_raw(cursor).unwrap();
286                }
287
288                Some(ColumnData::String(string))
289            }
290            ColumnDataType::Bool => {
291                // FIXME: i believe Bool is int8?
292                let bool_data: i32 = Self::read_data_raw(cursor).unwrap();
293
294                Some(ColumnData::Bool(bool_data == 1))
295            }
296            ColumnDataType::Int8 => Some(ColumnData::Int8(Self::read_data_raw(cursor).unwrap())),
297            ColumnDataType::UInt8 => Some(ColumnData::UInt8(Self::read_data_raw(cursor).unwrap())),
298            ColumnDataType::Int16 => Some(ColumnData::Int16(Self::read_data_raw(cursor).unwrap())),
299            ColumnDataType::UInt16 => {
300                Some(ColumnData::UInt16(Self::read_data_raw(cursor).unwrap()))
301            }
302            ColumnDataType::Int32 => Some(ColumnData::Int32(Self::read_data_raw(cursor).unwrap())),
303            ColumnDataType::UInt32 => {
304                Some(ColumnData::UInt32(Self::read_data_raw(cursor).unwrap()))
305            }
306            ColumnDataType::Float32 => {
307                Some(ColumnData::Float32(Self::read_data_raw(cursor).unwrap()))
308            }
309            ColumnDataType::Int64 => Some(ColumnData::Int64(Self::read_data_raw(cursor).unwrap())),
310            ColumnDataType::UInt64 => {
311                Some(ColumnData::UInt64(Self::read_data_raw(cursor).unwrap()))
312            }
313            ColumnDataType::PackedBool0 => Some(ColumnData::Bool(read_packed_bool(0))),
314            ColumnDataType::PackedBool1 => Some(ColumnData::Bool(read_packed_bool(1))),
315            ColumnDataType::PackedBool2 => Some(ColumnData::Bool(read_packed_bool(2))),
316            ColumnDataType::PackedBool3 => Some(ColumnData::Bool(read_packed_bool(3))),
317            ColumnDataType::PackedBool4 => Some(ColumnData::Bool(read_packed_bool(4))),
318            ColumnDataType::PackedBool5 => Some(ColumnData::Bool(read_packed_bool(5))),
319            ColumnDataType::PackedBool6 => Some(ColumnData::Bool(read_packed_bool(6))),
320            ColumnDataType::PackedBool7 => Some(ColumnData::Bool(read_packed_bool(7))),
321        }
322    }
323
324    fn write_data_raw<T: Write + Seek, Z: BinWrite<Args<'static> = ()>>(cursor: &mut T, value: &Z) {
325        value.write_options(cursor, Endian::Big, ()).unwrap()
326    }
327
328    pub(crate) fn write_column<T: Write + Seek>(
329        cursor: &mut T,
330        column: &ColumnData,
331        column_definition: &ExcelColumnDefinition,
332        strings_len: &mut u32,
333        packed_bools: &mut HashMap<u16, u8>,
334    ) {
335        match column {
336            ColumnData::String(val) => {
337                let string_offset = *strings_len;
338                Self::write_data_raw(cursor, &string_offset);
339                *strings_len += val.len() as u32 + 1;
340            }
341            ColumnData::Bool(_) => match column_definition.data_type {
342                ColumnDataType::Bool => todo!(),
343                // packed bools are handled in write_rows
344                ColumnDataType::PackedBool0
345                | ColumnDataType::PackedBool1
346                | ColumnDataType::PackedBool2
347                | ColumnDataType::PackedBool3
348                | ColumnDataType::PackedBool4
349                | ColumnDataType::PackedBool5
350                | ColumnDataType::PackedBool6
351                | ColumnDataType::PackedBool7 => {
352                    if let Some(byte) = packed_bools.get(&column_definition.offset) {
353                        byte.write_le(cursor).unwrap();
354
355                        // then remove it so the next packed bool column doesn't write it again
356                        packed_bools.remove(&column_definition.offset);
357                    }
358                }
359                _ => panic!("This makes no sense!"),
360            },
361            ColumnData::Int8(val) => Self::write_data_raw(cursor, val),
362            ColumnData::UInt8(val) => Self::write_data_raw(cursor, val),
363            ColumnData::Int16(val) => Self::write_data_raw(cursor, val),
364            ColumnData::UInt16(val) => Self::write_data_raw(cursor, val),
365            ColumnData::Int32(val) => Self::write_data_raw(cursor, val),
366            ColumnData::UInt32(val) => Self::write_data_raw(cursor, val),
367            ColumnData::Float32(val) => Self::write_data_raw(cursor, val),
368            ColumnData::Int64(val) => Self::write_data_raw(cursor, val),
369            ColumnData::UInt64(val) => Self::write_data_raw(cursor, val),
370        }
371    }
372}