physis/
exd.rs

1// SPDX-FileCopyrightText: 2023 Joshua Goins <josh@redstrate.com>
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4use std::io::{BufWriter, Cursor, Read, Seek, SeekFrom, Write};
5
6use binrw::{BinRead, Endian};
7use binrw::{BinResult, BinWrite, binrw};
8
9use crate::common::Language;
10use crate::exh::{ColumnDataType, EXH, ExcelColumnDefinition, ExcelDataPagination};
11use crate::{ByteBuffer, ByteSpan};
12
13#[binrw]
14#[brw(magic = b"EXDF")]
15#[brw(big)]
16#[allow(dead_code)]
17#[derive(Debug)]
18struct EXDHeader {
19    version: u16,
20
21    #[brw(pad_before = 2)] // empty
22    /// Size of the ExcelDataOffset array
23    index_size: u32,
24    #[brw(pad_after = 16)] // empty
25    /// Total size of the string data?
26    data_section_size: u32,
27}
28
29#[binrw]
30#[brw(big)]
31#[derive(Debug)]
32struct ExcelDataOffset {
33    row_id: u32,
34    pub offset: u32,
35}
36
37#[binrw]
38#[brw(big)]
39#[allow(dead_code)]
40struct ExcelDataRowHeader {
41    data_size: u32,
42    row_count: u16,
43}
44
45#[binrw::parser(reader)]
46fn parse_rows(exh: &EXH, data_offsets: &Vec<ExcelDataOffset>) -> BinResult<Vec<ExcelRow>> {
47    let mut rows = Vec::new();
48
49    for offset in data_offsets {
50        reader.seek(SeekFrom::Start(offset.offset.into()))?;
51
52        let row_header = ExcelDataRowHeader::read(reader)?;
53
54        let data_offset = reader.stream_position().unwrap() as u32;
55
56        let mut read_row = |row_offset: u32| -> Option<ExcelSingleRow> {
57            let mut subrow = ExcelSingleRow {
58                columns: Vec::with_capacity(exh.column_definitions.len()),
59            };
60
61            for column in &exh.column_definitions {
62                reader
63                    .seek(SeekFrom::Start((row_offset + column.offset as u32).into()))
64                    .ok()?;
65
66                subrow
67                    .columns
68                    .push(EXD::read_column(reader, exh, row_offset, column).unwrap());
69            }
70
71            Some(subrow)
72        };
73
74        let new_row = if row_header.row_count > 1 {
75            let mut rows = Vec::new();
76            for i in 0..row_header.row_count {
77                let subrow_offset = data_offset + (i * exh.header.data_offset + 2 * (i + 1)) as u32;
78
79                rows.push(read_row(subrow_offset).unwrap());
80            }
81            ExcelRowKind::SubRows(rows)
82        } else {
83            ExcelRowKind::SingleRow(read_row(data_offset).unwrap())
84        };
85        rows.push(ExcelRow {
86            row_id: offset.row_id,
87            kind: new_row,
88        });
89    }
90
91    Ok(rows)
92}
93
94#[binrw::writer(writer)]
95fn write_rows(rows: &Vec<ExcelRow>, exh: &EXH) -> BinResult<()> {
96    // seek past the data offsets, which we will write later
97    let data_offsets_pos = writer.stream_position().unwrap();
98    writer
99        .seek(SeekFrom::Current(
100            (core::mem::size_of::<ExcelDataOffset>() * rows.len()) as i64,
101        ))
102        .unwrap();
103
104    let mut data_offsets = Vec::new();
105
106    for row in rows {
107        data_offsets.push(ExcelDataOffset {
108            row_id: row.row_id,
109            offset: writer.stream_position().unwrap() as u32,
110        });
111
112        let row_header = ExcelDataRowHeader {
113            data_size: 0,
114            row_count: 0,
115        };
116        row_header.write(writer).unwrap();
117
118        // write column data
119        {
120            let mut write_row = |row: &ExcelSingleRow| {
121                for (i, column) in row.columns.iter().enumerate() {
122                    EXD::write_column(writer, &column, &exh.column_definitions[i]);
123                }
124            };
125
126            match &row.kind {
127                ExcelRowKind::SingleRow(excel_single_row) => write_row(excel_single_row),
128                ExcelRowKind::SubRows(excel_single_rows) => {
129                    for row in excel_single_rows {
130                        write_row(row);
131                    }
132                }
133            }
134        }
135
136        // write strings at the end of column data
137        {
138            let mut write_row_strings = |row: &ExcelSingleRow| {
139                for column in &row.columns {
140                    match column {
141                        ColumnData::String(val) => {
142                            let bytes = val.as_bytes();
143                            bytes.write(writer).unwrap();
144                        }
145                        _ => {}
146                    }
147                }
148            };
149
150            match &row.kind {
151                ExcelRowKind::SingleRow(excel_single_row) => write_row_strings(excel_single_row),
152                ExcelRowKind::SubRows(excel_single_rows) => {
153                    for row in excel_single_rows {
154                        write_row_strings(row);
155                    }
156                }
157            }
158        }
159
160        // There's an empty byte between each row... for some reason
161        0u8.write_le(writer).unwrap();
162    }
163
164    // now write the data offsets
165    writer.seek(SeekFrom::Start(data_offsets_pos)).unwrap();
166    data_offsets.write(writer).unwrap();
167
168    Ok(())
169}
170
171#[binrw]
172#[brw(big)]
173#[allow(dead_code)]
174#[derive(Debug)]
175#[brw(import(exh: &EXH))]
176pub struct EXD {
177    header: EXDHeader,
178
179    #[br(count = header.index_size / core::mem::size_of::<ExcelDataOffset>() as u32)]
180    #[bw(ignore)]
181    data_offsets: Vec<ExcelDataOffset>,
182
183    #[br(parse_with = parse_rows, args(&exh, &data_offsets))]
184    #[bw(write_with = write_rows, args(&exh))]
185    pub rows: Vec<ExcelRow>,
186}
187
188#[derive(Debug, Clone)]
189pub enum ColumnData {
190    String(String),
191    Bool(bool),
192    Int8(i8),
193    UInt8(u8),
194    Int16(i16),
195    UInt16(u16),
196    Int32(i32),
197    UInt32(u32),
198    Float32(f32),
199    Int64(i64),
200    UInt64(u64),
201}
202
203impl ColumnData {
204    // Returns a Some(String) if this column was a String, otherwise None.
205    pub fn into_string(&self) -> Option<&String> {
206        if let ColumnData::String(value) = self {
207            return Some(value);
208        }
209        None
210    }
211
212    // Returns a Some(bool) if this column was a Bool, otherwise None.
213    pub fn into_bool(&self) -> Option<&bool> {
214        if let ColumnData::Bool(value) = self {
215            return Some(value);
216        }
217        None
218    }
219
220    // Returns a Some(i8) if this column was a Int8, otherwise None.
221    pub fn into_i8(&self) -> Option<&i8> {
222        if let ColumnData::Int8(value) = self {
223            return Some(value);
224        }
225        None
226    }
227
228    // Returns a Some(u8) if this column was a UInt8, otherwise None.
229    pub fn into_u8(&self) -> Option<&u8> {
230        if let ColumnData::UInt8(value) = self {
231            return Some(value);
232        }
233        None
234    }
235
236    // Returns a Some(i16) if this column was a Int16, otherwise None.
237    pub fn into_i16(&self) -> Option<&i16> {
238        if let ColumnData::Int16(value) = self {
239            return Some(value);
240        }
241        None
242    }
243
244    // Returns a Some(u16) if this column was a UInt16, otherwise None.
245    pub fn into_u16(&self) -> Option<&u16> {
246        if let ColumnData::UInt16(value) = self {
247            return Some(value);
248        }
249        None
250    }
251
252    // Returns a Some(i32) if this column was a Int32, otherwise None.
253    pub fn into_i32(&self) -> Option<&i32> {
254        if let ColumnData::Int32(value) = self {
255            return Some(value);
256        }
257        None
258    }
259
260    // Returns a Some(u32) if this column was a UInt32, otherwise None.
261    pub fn into_u32(&self) -> Option<&u32> {
262        if let ColumnData::UInt32(value) = self {
263            return Some(value);
264        }
265        None
266    }
267
268    // Returns a Some(f32) if this column was a Float32, otherwise None.
269    pub fn into_f32(&self) -> Option<&f32> {
270        if let ColumnData::Float32(value) = self {
271            return Some(value);
272        }
273        None
274    }
275
276    // Returns a Some(i64) if this column was a Int64, otherwise None.
277    pub fn into_i64(&self) -> Option<&i64> {
278        if let ColumnData::Int64(value) = self {
279            return Some(value);
280        }
281        None
282    }
283
284    // Returns a Some(u64) if this column was a UInt64, otherwise None.
285    pub fn into_u64(&self) -> Option<&u64> {
286        if let ColumnData::UInt64(value) = self {
287            return Some(value);
288        }
289        None
290    }
291}
292
293#[derive(Debug, Clone)]
294pub struct ExcelSingleRow {
295    pub columns: Vec<ColumnData>,
296}
297
298#[derive(Debug, Clone)]
299pub enum ExcelRowKind {
300    SingleRow(ExcelSingleRow),
301    SubRows(Vec<ExcelSingleRow>),
302}
303
304#[derive(Debug)]
305pub struct ExcelRow {
306    pub row_id: u32,
307    pub kind: ExcelRowKind,
308}
309
310impl EXD {
311    pub fn from_existing(exh: &EXH, buffer: ByteSpan) -> Option<EXD> {
312        EXD::read_args(&mut Cursor::new(&buffer), (exh,)).ok()
313    }
314
315    pub fn get_row(&self, row_id: u32) -> Option<ExcelRowKind> {
316        for row in &self.rows {
317            if row.row_id == row_id {
318                return Some(row.kind.clone());
319            }
320        }
321
322        return None;
323    }
324
325    fn read_data_raw<T: Read + Seek, Z: BinRead<Args<'static> = ()>>(cursor: &mut T) -> Option<Z> {
326        Z::read_options(cursor, Endian::Big, ()).ok()
327    }
328
329    fn read_column<T: Read + Seek>(
330        cursor: &mut T,
331        exh: &EXH,
332        row_offset: u32,
333        column: &ExcelColumnDefinition,
334    ) -> Option<ColumnData> {
335        let mut read_packed_bool = |shift: i32| -> bool {
336            let bit = 1 << shift;
337            let bool_data: i32 = Self::read_data_raw(cursor).unwrap_or(0);
338
339            (bool_data & bit) == bit
340        };
341
342        match column.data_type {
343            ColumnDataType::String => {
344                let string_offset: u32 = Self::read_data_raw(cursor).unwrap();
345
346                cursor
347                    .seek(SeekFrom::Start(
348                        (row_offset + exh.header.data_offset as u32 + string_offset).into(),
349                    ))
350                    .ok()?;
351
352                let mut string = String::new();
353
354                let mut byte: u8 = Self::read_data_raw(cursor).unwrap();
355                while byte != 0 {
356                    string.push(byte as char);
357                    byte = Self::read_data_raw(cursor).unwrap();
358                }
359
360                Some(ColumnData::String(string))
361            }
362            ColumnDataType::Bool => {
363                // FIXME: i believe Bool is int8?
364                let bool_data: i32 = Self::read_data_raw(cursor).unwrap();
365
366                Some(ColumnData::Bool(bool_data == 1))
367            }
368            ColumnDataType::Int8 => Some(ColumnData::Int8(Self::read_data_raw(cursor).unwrap())),
369            ColumnDataType::UInt8 => Some(ColumnData::UInt8(Self::read_data_raw(cursor).unwrap())),
370            ColumnDataType::Int16 => Some(ColumnData::Int16(Self::read_data_raw(cursor).unwrap())),
371            ColumnDataType::UInt16 => {
372                Some(ColumnData::UInt16(Self::read_data_raw(cursor).unwrap()))
373            }
374            ColumnDataType::Int32 => Some(ColumnData::Int32(Self::read_data_raw(cursor).unwrap())),
375            ColumnDataType::UInt32 => {
376                Some(ColumnData::UInt32(Self::read_data_raw(cursor).unwrap()))
377            }
378            ColumnDataType::Float32 => {
379                Some(ColumnData::Float32(Self::read_data_raw(cursor).unwrap()))
380            }
381            ColumnDataType::Int64 => Some(ColumnData::Int64(Self::read_data_raw(cursor).unwrap())),
382            ColumnDataType::UInt64 => {
383                Some(ColumnData::UInt64(Self::read_data_raw(cursor).unwrap()))
384            }
385            ColumnDataType::PackedBool0 => Some(ColumnData::Bool(read_packed_bool(0))),
386            ColumnDataType::PackedBool1 => Some(ColumnData::Bool(read_packed_bool(1))),
387            ColumnDataType::PackedBool2 => Some(ColumnData::Bool(read_packed_bool(2))),
388            ColumnDataType::PackedBool3 => Some(ColumnData::Bool(read_packed_bool(3))),
389            ColumnDataType::PackedBool4 => Some(ColumnData::Bool(read_packed_bool(4))),
390            ColumnDataType::PackedBool5 => Some(ColumnData::Bool(read_packed_bool(5))),
391            ColumnDataType::PackedBool6 => Some(ColumnData::Bool(read_packed_bool(6))),
392            ColumnDataType::PackedBool7 => Some(ColumnData::Bool(read_packed_bool(7))),
393        }
394    }
395
396    fn write_data_raw<T: Write + Seek, Z: BinWrite<Args<'static> = ()>>(cursor: &mut T, value: &Z) {
397        value.write_options(cursor, Endian::Big, ()).unwrap()
398    }
399
400    fn write_column<T: Write + Seek>(
401        cursor: &mut T,
402        column: &ColumnData,
403        column_definition: &ExcelColumnDefinition,
404    ) {
405        let write_packed_bool = |cursor: &mut T, shift: i32, boolean: &bool| {
406            let val = 0i32; // TODO
407            Self::write_data_raw(cursor, &val);
408        };
409
410        match column {
411            ColumnData::String(_) => {
412                let string_offset = 0u32; // TODO, but 0 is fine for single string column data
413                Self::write_data_raw(cursor, &string_offset);
414            }
415            ColumnData::Bool(val) => match column_definition.data_type {
416                ColumnDataType::Bool => todo!(),
417                ColumnDataType::PackedBool0 => write_packed_bool(cursor, 0, val),
418                ColumnDataType::PackedBool1 => write_packed_bool(cursor, 1, val),
419                ColumnDataType::PackedBool2 => write_packed_bool(cursor, 2, val),
420                ColumnDataType::PackedBool3 => write_packed_bool(cursor, 3, val),
421                ColumnDataType::PackedBool4 => write_packed_bool(cursor, 4, val),
422                ColumnDataType::PackedBool5 => write_packed_bool(cursor, 5, val),
423                ColumnDataType::PackedBool6 => write_packed_bool(cursor, 6, val),
424                ColumnDataType::PackedBool7 => write_packed_bool(cursor, 7, val),
425                _ => panic!("This makes no sense!"),
426            },
427            ColumnData::Int8(val) => Self::write_data_raw(cursor, val),
428            ColumnData::UInt8(val) => Self::write_data_raw(cursor, val),
429            ColumnData::Int16(val) => Self::write_data_raw(cursor, val),
430            ColumnData::UInt16(val) => Self::write_data_raw(cursor, val),
431            ColumnData::Int32(val) => Self::write_data_raw(cursor, val),
432            ColumnData::UInt32(val) => Self::write_data_raw(cursor, val),
433            ColumnData::Float32(val) => Self::write_data_raw(cursor, val),
434            ColumnData::Int64(val) => Self::write_data_raw(cursor, val),
435            ColumnData::UInt64(val) => Self::write_data_raw(cursor, val),
436        }
437    }
438
439    pub fn calculate_filename(
440        name: &str,
441        language: Language,
442        page: &ExcelDataPagination,
443    ) -> String {
444        use crate::common::get_language_code;
445
446        match language {
447            Language::None => {
448                format!("{name}_{}.exd", page.start_id)
449            }
450            lang => {
451                format!("{name}_{}_{}.exd", page.start_id, get_language_code(&lang))
452            }
453        }
454    }
455
456    pub fn write_to_buffer(&self, exh: &EXH) -> Option<ByteBuffer> {
457        let mut buffer = ByteBuffer::new();
458
459        {
460            let cursor = Cursor::new(&mut buffer);
461            let mut writer = BufWriter::new(cursor);
462
463            self.write_args(&mut writer, (exh,)).unwrap();
464        }
465
466        Some(buffer)
467    }
468}
469
470#[cfg(test)]
471mod tests {
472    use crate::exh::EXHHeader;
473    use std::fs::read;
474    use std::path::PathBuf;
475
476    use super::*;
477
478    #[test]
479    fn test_invalid() {
480        let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
481        d.push("resources/tests");
482        d.push("random");
483
484        let exh = EXH {
485            header: EXHHeader {
486                version: 0,
487                data_offset: 0,
488                column_count: 0,
489                page_count: 0,
490                language_count: 0,
491                row_count: 0,
492            },
493            column_definitions: vec![],
494            pages: vec![],
495            languages: vec![],
496        };
497
498        // Feeding it invalid data should not panic
499        EXD::from_existing(&exh, &read(d).unwrap());
500    }
501}