physis/
dic.rs

1// SPDX-FileCopyrightText: 2024 Joshua Goins <josh@redstrate.com>
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4use std::collections::HashMap;
5use std::io::{Cursor, Seek, SeekFrom};
6
7use crate::ByteSpan;
8use binrw::binrw;
9use binrw::{BinRead, BinReaderExt};
10
11// Based off of https://github.com/Lotlab/ffxiv-vulgar-words-reader/
12// Credit goes to Jim Kirisame for documenting this format
13// TODO: double check I'm reading everything correctly
14
15#[binrw]
16#[derive(Debug)]
17#[brw(little)]
18pub struct EntryItem {
19    flag: u32,
20    sibling: u32,
21    child: u32,
22    offset: u32,
23}
24
25#[binrw]
26#[derive(Debug)]
27#[brw(little)]
28struct DictionaryHeader {
29    #[br(seek_before = SeekFrom::Start(0x8124))]
30    #[br(count = 256)]
31    chara_replace1: Vec<u16>,
32
33    #[br(count = 256)]
34    chara_replace2: Vec<u16>,
35
36    #[br(count = 256)]
37    chara_replace3: Vec<u16>,
38
39    #[br(count = 5)]
40    block_offsets: Vec<u32>,
41
42    #[br(count = 5)]
43    block_lengths: Vec<u32>,
44
45    #[br(pad_before = 4)]
46    #[br(count = 256)]
47    chara_block: Vec<u32>,
48
49    #[br(ignore)]
50    begin_node: Vec<u16>,
51
52    #[br(ignore)]
53    inner_node: Vec<u16>,
54
55    #[br(ignore)]
56    chara: Vec<u16>,
57
58    #[br(ignore)]
59    word: Vec<u16>,
60
61    #[br(ignore)]
62    entries: Vec<EntryItem>,
63}
64
65pub struct Dictionary {
66    header: DictionaryHeader,
67    pub words: Vec<String>,
68}
69
70impl Dictionary {
71    /// Parses an existing dictionary file.
72    pub fn from_existing(buffer: ByteSpan) -> Option<Dictionary> {
73        let mut cursor = Cursor::new(buffer);
74        let mut dict = DictionaryHeader::read(&mut cursor).unwrap();
75
76        let map_start = 0x8750u32;
77        let map_size = 0x200u32;
78
79        // fix up offsets
80        for offset in &mut dict.block_offsets {
81            *offset = *offset + map_start + map_size;
82        }
83
84        for i in 0..dict.block_lengths[0] / 2 {
85            let offset = dict.block_offsets[0] + i * 2;
86            cursor.seek(SeekFrom::Start(offset as u64)).ok()?;
87            dict.begin_node.push(cursor.read_le::<u16>().ok()?);
88        }
89
90        for i in 0..dict.block_lengths[1] / 2 {
91            let offset = dict.block_offsets[1] + i * 2;
92            cursor.seek(SeekFrom::Start(offset as u64)).ok()?;
93            dict.inner_node.push(cursor.read_le::<u16>().ok()?);
94        }
95
96        for i in 0..dict.block_lengths[2] / 2 {
97            let offset = dict.block_offsets[2] + i * 2;
98            cursor.seek(SeekFrom::Start(offset as u64)).ok()?;
99            dict.chara.push(cursor.read_le::<u16>().ok()?);
100        }
101
102        for i in 0..dict.block_lengths[3] / 2 {
103            let offset = dict.block_offsets[3] + i * 2;
104            cursor.seek(SeekFrom::Start(offset as u64)).ok()?;
105            dict.word.push(cursor.read_le::<u16>().ok()?);
106        }
107
108        for i in 0..dict.block_lengths[4] / 16 {
109            let offset = dict.block_offsets[4] + i * 16;
110            cursor.seek(SeekFrom::Start(offset as u64)).ok()?;
111            dict.entries.push(cursor.read_le::<EntryItem>().ok()?);
112        }
113
114        let mut dict = Dictionary {
115            header: dict,
116            words: Vec::new(),
117        };
118
119        // TODO: lol
120        dict.words = dict.list_words()?;
121
122        Some(dict)
123    }
124
125    fn list_words(&self) -> Option<Vec<String>> {
126        let mut result = Vec::new();
127        let lut = self.generate_index_rune_lookup_table();
128        for (id, v) in self.header.begin_node.iter().enumerate() {
129            if *v == 0 {
130                continue;
131            }
132
133            let chara = Dictionary::index_to_rune(&lut, id as u32);
134            self.dump_dict_node(&mut result, *v as i32, String::from(chara as u8 as char))
135        }
136
137        Some(result)
138    }
139
140    fn generate_index_rune_lookup_table(&self) -> HashMap<u16, u16> {
141        let mut map = HashMap::new();
142        for i in 0..self.header.chara_block.len() {
143            map.insert(self.header.chara_block[i] as u16, i as u16);
144        }
145
146        map
147    }
148
149    fn index_to_rune(lookup_table: &HashMap<u16, u16>, index: u32) -> i32 {
150        let higher = index >> 8;
151        let lower = index & 0xFF;
152
153        if higher == 0 {
154            return 0;
155        }
156
157        if let Some(new_val) = lookup_table.get(&(higher as u16)) {
158            (((*new_val as u32) << 8) + lower) as i32
159        } else {
160            0
161        }
162    }
163
164    fn dump_dict_node(&self, vec: &mut Vec<String>, entry_id: i32, prev: String) {
165        let node = &self.header.entries[entry_id as usize];
166        for i in 0..node.sibling {
167            let Some(current) = self.get_string(entry_id, i as i32) else {
168                return;
169            };
170
171            if node.child == 0 {
172                vec.push(prev.clone() + &current);
173                continue;
174            }
175
176            let value = self.header.inner_node[(node.child + i) as usize];
177            if value == 0 {
178                vec.push(prev.clone() + &current);
179                continue;
180            }
181
182            self.dump_dict_node(vec, value as i32, prev.clone() + &current);
183        }
184    }
185
186    fn get_string(&self, entry_id: i32, sibling_id: i32) -> Option<String> {
187        if let Some(characters) = self.get_string_characters(entry_id, sibling_id) {
188            return String::from_utf16(&characters).ok();
189        }
190
191        None
192    }
193
194    fn get_string_characters(&self, entry_id: i32, sibling_id: i32) -> Option<Vec<u16>> {
195        if entry_id as usize >= self.header.entries.len() {
196            return None;
197        }
198
199        let entry = self.header.entries.get(entry_id as usize)?;
200
201        if entry.flag == 0 {
202            let pos = (entry.offset / 2) as i32 + sibling_id;
203            if pos as usize > self.header.chara.len() {
204                return None;
205            }
206
207            if self.header.chara[pos as usize] == 0 {
208                return None;
209            }
210
211            return Some(vec![self.header.chara[pos as usize]]);
212        }
213
214        let begin = entry.offset / 2;
215        let mut end = begin + 1;
216
217        while (end as usize) < self.header.word.len() && self.header.word[end as usize] != 0 {
218            end += 1;
219        }
220
221        Some(self.header.word[begin as usize..end as usize].to_vec())
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use std::fs::read;
228    use std::path::PathBuf;
229
230    use super::*;
231
232    #[test]
233    fn test_invalid() {
234        let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
235        d.push("resources/tests");
236        d.push("random");
237
238        // Feeding it invalid data should not panic
239        Dictionary::from_existing(&read(d).unwrap());
240    }
241}