first commit

This commit is contained in:
dasha 2026-01-05 12:33:47 +03:00
commit acd31cdf9d
73 changed files with 650543 additions and 0 deletions

0
helpers/blocks.py Normal file
View file

143
helpers/index.py Normal file
View file

@ -0,0 +1,143 @@
# extract and parse asset map
import helpers.utils as utils
from parsers.ai import AssetIndex
import io
import os
import sys
import json
import shutil
import itertools
import subprocess
from rich import print
from datetime import datetime
from rich.markdown import Markdown
from kaitaistruct import KaitaiStream
from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn, Console
def extract_asset_index():
game_target = utils.get_config("gameTarget")
if not game_target:
print("[red3]No game target !\nDefine it with --set_target first[/]")
return
console = Console(record=True)
print(Markdown(f"## Generating asset index file (target : v{game_target})"))
with Progress(
SpinnerColumn(),
*Progress.get_default_columns(),
TimeElapsedColumn(),
console=console,
transient=False
) as progress:
task = progress.add_task("[cyan]Building asset index...", total=None)
# extract 0000006f from blk
blk_file = utils.get_path(f"blk/31049740.blk") # TODO: use config for block id
progress.log("Extracting [b]0000006f[/]...")
subprocess.call([utils.get_config("studioPath"), blk_file, utils.get_path("bin/"), "--game", "GI", "--types", "MiHoYoBinData"], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
shutil.move(utils.get_path("bin/MiHoYoBinData/0000006f"), utils.get_path("bin/0000006f"))
shutil.rmtree(utils.get_path("bin/MiHoYoBinData"))
# parse 0000006f
progress.log("Parsing...")
with open(utils.get_path("bin/0000006f"), "rb") as f:
ai_ref_data = f.read()
f.close()
stream = KaitaiStream(io.BytesIO(ai_ref_data)) # this allow file deletion later
asset_index_ref = AssetIndex(stream)
with open(utils.get_path("configs/map_helper.json"), "r") as f:
asset_map = json.loads(f.read())
f.close()
asset_index = {}
# types
progress.log(Markdown("Processing __types__..."))
asset_index["Types"] = {e.name.data:e.mapped_to.data for e in asset_index_ref.type_mapping}
# sub assets
progress.log(Markdown("Processing __sub assets__..."))
sub = {}
pos = 0
for e in asset_index_ref.assets:
try:
name = asset_map[str((e.path_hash_last << 8) | e.path_hash_pre)]
pos += 1
except:
name = ""
if e.sub_asset_id not in sub:
sub[e.sub_asset_id] = []
sub[e.sub_asset_id].append({
"Name": name,
"PathHashPre": e.path_hash_pre,
"PathHashLast": e.path_hash_last
})
coverage = pos/len(asset_index_ref.assets)
asset_index["SubAssets"] = sub
# dependencies
progress.log(Markdown("Processing __dependencies__..."))
asset_index["Dependencies"] = {e.asset_id:e.dependencies_list for e in asset_index_ref.dependencies}
# preload blocks
progress.log(Markdown("Processing __preload blocks__..."))
asset_index["PreloadBlocks"] = asset_index_ref.preload_blocks
# preload shader blocks
progress.log(Markdown("Processing __preload shader blocks__..."))
asset_index["PreloadShaderBlocks"] = asset_index_ref.preload_shader_blocks
# assets
progress.log(Markdown("Processing __assets__..."))
blocksGroups = {j.block_list:i.group_id for i in asset_index_ref.block_groups for j in i.block_list}
asset_index["Assets"] = {j.path_hash_pre:{
"Language": blocksGroups[i.block_id] if i.block_id in blocksGroups.keys() else 0,
"Id": i.block_id,
"Offset": j.offset
} for i in asset_index_ref.block_infos for j in i.asset_offsets}
# write output
data = json.dumps(asset_index, ensure_ascii=False, sort_keys=True)
filesize = len(data)
with open(utils.get_path(f"configs/indexes/index.{game_target}.json"), "w", encoding="utf-8") as f:
f.write(data)
f.close()
metadata = {
"coverage": coverage,
"size": filesize,
"time": datetime.now().timestamp()
}
with open(utils.get_path(f"configs/indexes/index.{game_target}.meta.json"), "w") as f:
f.write(json.dumps(metadata))
f.close()
data = None
asset_index_ref = None
os.remove(utils.get_path("bin/0000006f"))
progress.stop_task(task)
progress.log(Markdown("## Done !"))
progress.log(Markdown(f"# Coverage : `{'{0:.2%}'.format(coverage)}` | Map size : `{round(filesize / 1048576, 2)} Mb`"))

85
helpers/status.py Normal file
View file

@ -0,0 +1,85 @@
# status stuff from project
import helpers.utils as utils
import os
from rich import print
from datetime import datetime
def is_key_defined(name, default):
key = utils.get_config(name)
if key == default:
return [False]
return [True, key]
def is_file_present(path):
full_path = utils.get_path(path)
present = os.path.isfile(full_path)
return present
def get_status():
file = None
text_util = lambda file, folder: [is_file_present(f"blk/{(file := file)}.blk"), f"{str(folder).zfill(2)}/{file}.blk"]
# TODO: use config for blocks id
status = {
"Blocks": {
"BinOutput": text_util(24230448, 0),
"ExcelBinOutput": text_util(25539185, 0),
"AssetIndex": text_util(31049740, 0),
"LuaScripts": text_util(35323818, 0)
},
"Textmaps": {
"CHS": text_util(26692920, 1),
"CHT": text_util(27251172, 2),
"DE": text_util(25181351, 3),
"EN": text_util(25776943, 4),
"ES": text_util(20618174, 5),
"FR": text_util(25555476, 6),
"ID": text_util(30460104, 7),
"JP": text_util(32244380, 8),
"KR": text_util(22299426, 9),
"PT": text_util(23331191, 10),
"RU": text_util(21030516, 11),
"TH": text_util(32056053, 12),
"VI": text_util(34382464, 13),
"IT": text_util(27270675, 14),
"TR": text_util(21419401, 15)
}
}
gameTarget = is_key_defined("gameTarget", None)
index = False
if gameTarget[0]:
indexCheck = is_file_present(f"configs/indexes/index.{gameTarget[1]}.json")
metaCheck = is_file_present(f"configs/indexes/index.{gameTarget[1]}.meta.json")
index = True
if any([not indexCheck, not metaCheck]):
index = False
if index:
meta = utils.get_index_meta(gameTarget[1])
timeText = datetime.fromtimestamp(meta["time"]).strftime("%m/%d/%Y")
indexText = f'generated on {timeText} | {round(meta["size"] / 1048576, 2)} Mb | {"{0:.2%}".format(meta["coverage"])} coverage'
status["Configs"] = {
"GameTarget": [gameTarget[0], f"target : v{gameTarget[1]}" if gameTarget[0] else "not defined yet"],
"Index": [index, indexText if index else "not generated yet"]
}
return status
def print_status():
status = get_status()
for cat, values in status.items():
print(f"[bold underline]{cat}[/bold underline]")
for elem, state in values.items():
symbol = "[green]\u2713[/green]" if state[0] else "[red]\u2718[/red]"
additional = f"[dim white] ({state[1]})[/dim white]" if state[1] != "" else ""
print(f"{symbol} {elem}{additional}")

145
helpers/textmap.py Normal file
View file

@ -0,0 +1,145 @@
# parse text maps
import helpers.utils as utils
from parsers.text import Textmap
import re
import os
import glob
import shutil
import json
import subprocess
from rich import print
from io import BytesIO
from rich.markdown import Markdown
from kaitaistruct import KaitaiStream
from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn, Console
# TODO: use config
map_files = {
"CHS": 26692920,
"CHT": 27251172,
"DE": 25181351,
"EN": 25776943,
"ES": 20618174,
"FR": 25555476,
"ID": 30460104,
"JP": 32244380,
"KR": 22299426,
"PT": 23331191,
"RU": 21030516,
"TH": 32056053,
"VI": 34382464,
"IT": 27270675,
"TR": 21419401
}
ASCII = re.compile(r"[^\u0020-\u007E]")
def extract_text_maps(requested):
game_target = utils.get_config("gameTarget")
if not game_target:
print("[red3]No game target !\nDefine it with --set_target first[/]")
return
console = Console(record=True)
for lang in requested:
blk_file = utils.get_path(f"blk/{map_files[lang]}.blk")
# find xor key
# actually xor key is annoying to find so manually defined for now, need to update at every version
xor_key = 99
print(f"XOR key : {xor_key}")
# extract mhy data
with console.status("[cyan]Extracting text data...", spinner="dots") as status:
process = subprocess.Popen([utils.get_config("studioPath"), blk_file, utils.get_path("bin/"), "--game", "GI", "--types", "MiHoYoBinData", "--key", str(int(str(xor_key), 16))], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
process.wait()
# xor each file
# doesn't work, encoding is missing and headers too
files = glob.glob(utils.get_path("bin/MiHoYoBinData") + "/*")
print(f"[cyan] [b]{len(files)}[/] assets extracted !")
# pos = 0
# for file in files:
# xor = int(str(xor_key), 16)
# pos += 1
# print(f"[{pos}/{len(files)}] Xoring {os.path.basename(file)}", end="\r")
# with open(file, "r+b") as f:
# data = f.read()
# if len(data) == 0:
# f.close()
# continue
# data = ASCII.sub("", "".join([chr(b ^ xor) for b in data]))
# f.seek(0)
# f.write(data.encode("utf-8"))
# f.truncate()
# f.close()
# parse each file
output = {}
pos = 0
with Progress(
SpinnerColumn(),
*Progress.get_default_columns(),
TimeElapsedColumn(),
console=console,
transient=False
) as progress:
task = progress.add_task(f"[cyan]Building ({lang}) text map...", total=len(files))
for file in files:
pos += 1
progress.update(task, advance=1)
# print(f"[{pos}/{len(files)}] Parsing {os.path.basename(file)}", end="\r")
# could store xor result so we don't reopen file ?
with open(file, "rb") as f:
data = f.read()
if len(data) == 0:
f.close()
continue
try:
stream = KaitaiStream(BytesIO(data))
obj = Textmap(stream)
except UnicodeDecodeError:
print("Failed to parse, is xor ok ?")
break
for block in obj.textmap:
if block.string.data != "":
output[str(block.hash.value)] = block.string.data
f.close()
out_path = utils.get_path(f"output/TextMap/TextMap{lang}.json")
os.makedirs(os.path.dirname(out_path), exist_ok=True)
# use ai file to sort like dim instead :/
output = dict(sorted(output.items(), key=lambda x: int(x[0])))
with open(out_path, "w", encoding='utf-8') as f:
json.dump(output, f, indent=4, ensure_ascii=False)
f.close()
filesize = os.path.getsize(out_path)
shutil.rmtree(utils.get_path("bin/MiHoYoBinData"))
# print(f"Extracted {lang} map !")
progress.log(Markdown("## Done !"))
progress.log(Markdown(f"# Entries : {len(output)} | Size : `{round(filesize / 1048576, 2)} Mb`"))
# extract_text_maps(["FR"])

32
helpers/utils.py Normal file
View file

@ -0,0 +1,32 @@
# utils
import os
import json
def get_path(*path):
return os.path.join(os.getcwd(), *path)
def get_config(name):
with open(get_path("configs/vars.json"), "r") as f:
config = json.loads(f.read())
f.close()
return config[name]
def set_config(name, value):
with open(get_path("configs/vars.json"), "r") as f:
config = json.loads(f.read())
f.close()
config[name] = value
with open(get_path("configs/vars.json"), "w") as f:
f.write(json.dumps(config, indent=4))
f.close()
def get_index_meta(version):
with open(get_path(f"configs/indexes/index.{version}.meta.json"), "r") as f:
meta = json.loads(f.read())
f.close()
return meta