AST2IR.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. # %%
  2. # python 3.9
  3. # pip install llvmlite==0.36.0
  4. from json import load
  5. from os import name
  6. import string
  7. from collections import defaultdict
  8. import llvmlite.ir as ir
  9. import llvmlite.binding as llvm
  10. from ctypes import CFUNCTYPE
  11. from dataclasses import dataclass, field
  12. from typing import Union
  13. from enum import Enum
  14. from llvmlite.ir.builder import IRBuilder
  15. from llvmlite.ir.values import Function
  16. llvm.initialize()
  17. llvm.initialize_native_target()
  18. llvm.initialize_native_asmprinter()
  19. ast = load(open("ast.json", encoding="utf8"))
  20. func_ty = ir.FunctionType(ir.VoidType(), [])
  21. f64_ty = ir.DoubleType()
  22. voidptr_ty = ir.IntType(8).as_pointer()
  23. i64_ty = ir.IntType(64)
  24. bool_ty = ir.IntType(1)
  25. printf_ty = ir.FunctionType(ir.IntType(64), [voidptr_ty], var_arg=True)
  26. m = ir.Module()
  27. func = ir.Function(m, func_ty, name="main")
  28. builder = ir.IRBuilder(func.append_basic_block('entry'))
  29. printf = ir.Function(m, printf_ty, name="printf")
  30. fmt = bytearray("> %.8f\n\0".encode('utf-8'))
  31. c_fmt = ir.Constant(ir.ArrayType(ir.IntType(8), len(fmt)), fmt)
  32. global_fmt = ir.GlobalVariable(m, c_fmt.type, name="fstr")
  33. global_fmt.linkage = 'internal'
  34. global_fmt.global_constant = True
  35. global_fmt.initializer = c_fmt
  36. fmt_arg = builder.bitcast(global_fmt, voidptr_ty)
  37. cond_fmt = bytearray("> cond: %d\n\0".encode('utf-8'))
  38. cond_c_fmt = ir.Constant(ir.ArrayType(ir.IntType(8), len(cond_fmt)), cond_fmt)
  39. cond_global_fmt = ir.GlobalVariable(m, cond_c_fmt.type, name="cond_global_fmt")
  40. cond_global_fmt.linkage = 'internal'
  41. cond_global_fmt.global_constant = True
  42. cond_global_fmt.initializer = cond_c_fmt
  43. # op_funcs = {
  44. # "+": builder.fadd, "-": builder.fsub,
  45. # "*": builder.fmul, "/": builder.fdiv,
  46. # }
  47. def op_funcs(builder: ir.IRBuilder):
  48. return {
  49. "+": builder.fadd, "-": builder.fsub,
  50. "*": builder.fmul, "/": builder.fdiv,
  51. }
  52. builtin_func = {
  53. 'output': printf,
  54. }
  55. is_number_str = lambda x: all([i in string.digits + "." for i in x])
  56. get_dict_key = lambda x: list(x.keys())
  57. get_dict_value = lambda x: list(x.values())
  58. class ArgumentType(Enum):
  59. CONST = 1
  60. VAR = 2
  61. @dataclass
  62. class ArgumentData:
  63. name: str
  64. type: ArgumentType
  65. def __repr__(self) -> str:
  66. return self.__str__()
  67. def __str__(self) -> str:
  68. return f"{self.type.name} {self.name}"
  69. def eval_line(builder: IRBuilder, line: dict, vars_envs: dict, func_envs: dict[str, ir.Function]):
  70. if isinstance(line, dict):
  71. name: str = get_dict_key(line)[0]
  72. params: list = get_dict_value(line)[0]
  73. else:
  74. if isinstance(line, ArgumentData):
  75. line: ArgumentData
  76. if line.type == ArgumentType.CONST:
  77. return f64_ty(line.name)
  78. elif line.type == ArgumentType.VAR:
  79. return eval_line(builder, {line.name: []}, vars_envs, func_envs)
  80. else:
  81. print('error line', line, type(line))
  82. return line
  83. if name == 'root':
  84. for arg in params:
  85. eval_line(builder, arg, vars_envs, func_envs)
  86. elif name.startswith('#'):
  87. pass
  88. elif name == '括号':
  89. return eval_line(builder, params[0], vars_envs, func_envs)
  90. elif name == '=':
  91. pass
  92. elif is_number_str(name):
  93. assert not params
  94. var = f64_ty(float(name))
  95. return var
  96. elif name in op_funcs(builder):
  97. assert len(params) == 2
  98. l = eval_line(builder, params[0], vars_envs, func_envs)
  99. r = eval_line(builder, params[1], vars_envs, func_envs)
  100. # print(f'{params=}', l, r, vars_envs, func_envs)
  101. res = op_funcs(builder)[name](l, r)
  102. # print(f"op {l=} {name} {r=} | {res=}")
  103. return res
  104. elif name in builtin_func:
  105. ir_params = []
  106. for para in params:
  107. para = eval_line(builder, para, vars_envs, func_envs)
  108. para = builder.sitofp(para, f64_ty)
  109. ir_params.append(para)
  110. func = builtin_func[name]
  111. call_res = builder.call(func, [fmt_arg, *ir_params])
  112. return call_res
  113. elif name in func_envs and len(params) != 0:
  114. ir_params = []
  115. for para in params:
  116. # print(f'{para=}')
  117. para = eval_line(builder, para, vars_envs, func_envs)
  118. # print(f'{para=}')
  119. ir_params.append(para)
  120. func = func_envs[name]
  121. while len(ir_params) < len(func.args):
  122. ir_params.append(f64_ty(0))
  123. ir_params[0] = builder.fptosi(ir_params[0], i64_ty)
  124. # print('call func', func, ir_params)
  125. return builder.call(func, ir_params)
  126. elif name in vars_envs:
  127. return vars_envs.get(name)
  128. else:
  129. print('cannot find', name)
  130. @dataclass
  131. class FuncData:
  132. name: str
  133. args: list[ArgumentData] # 未扩展的, 扩展只影响 expend_ast_func_param
  134. body: dict
  135. arg_count: int = field(default=0, init=False)
  136. const_arg_count: int = field(default=0, init=False)
  137. var_arg_count: int = field(default=0, init=False)
  138. @staticmethod
  139. def from_ast(ast: dict):
  140. assert '=' in ast, f"{ast} is not a def"
  141. prototype, body = ast['=']
  142. func_name = get_dict_key(prototype)[0]
  143. args = [get_dict_key(i)[0] for i in prototype[func_name]]
  144. args = [
  145. ArgumentData(i, ArgumentType.CONST if is_number_str(i) else ArgumentType.VAR)
  146. for i in args
  147. ]
  148. return FuncData(func_name, args, body)
  149. def __post_init__(self):
  150. for arg in self.args:
  151. self.arg_count += 1
  152. if arg.type == ArgumentType.CONST:
  153. self.const_arg_count += 1
  154. elif arg.type == ArgumentType.VAR:
  155. self.var_arg_count += 1
  156. def func_type(self):
  157. return ir.FunctionType(
  158. f64_ty,
  159. [f64_ty for _ in range(self.arg_count)]
  160. )
  161. def _generate_ll_name(self):
  162. return f"{self.name}-{self.args}"
  163. @staticmethod
  164. def expend_ast_func_param(func_name: str, root: dict):
  165. name: str = get_dict_key(root)[0]
  166. params: list = get_dict_value(root)[0]
  167. if name == '=':
  168. return root
  169. for i, p in enumerate(params):
  170. if not isinstance(p, dict):
  171. continue
  172. params[i] = FuncData.expend_ast_func_param(func_name, p)
  173. if name == func_name and len(params):
  174. params = [i64_ty(len(params)), *params]
  175. return {name: params}
  176. def generate_ll_func(self, m: ir.Module, funcs_dict: dict[str, ir.Function]):
  177. if not hasattr(self, '_current_func'):
  178. current_func = ir.Function(
  179. m, self.func_type(),
  180. f"{self.name}{self.args}"
  181. )
  182. var_envs = {}
  183. # print(f"{current_func=} {current_func.args=}")
  184. for i, arg in enumerate(self.args):
  185. if arg.type == ArgumentType.VAR:
  186. var_envs[arg.name] = current_func.args[i]
  187. bb_entry = current_func.append_basic_block("entry")
  188. current_builder = ir.IRBuilder(bb_entry)
  189. # print(f'{self.body=} {funcs_dict=}')
  190. res = eval_line(current_builder, self.body, var_envs, funcs_dict)
  191. current_builder.ret(res)
  192. self._current_func = current_func
  193. # print(current_func)
  194. return self._current_func
  195. @dataclass
  196. class OverloadableFunc:
  197. name: str
  198. funcs: list[FuncData]
  199. arg_counts: list[int] = field(init=False)
  200. def __post_init__(self):
  201. self.arg_counts = [f.arg_count for f in self.funcs]
  202. def add_func(self, f: FuncData):
  203. self.funcs.append(f)
  204. self.arg_counts = [f.arg_count for f in self.funcs]
  205. def master_func_ty(self):
  206. return ir.FunctionType(
  207. f64_ty,
  208. [i64_ty, *[f64_ty for _ in range(max(self.arg_counts))]]
  209. )
  210. def get_master_func_declear(self):
  211. if not hasattr(self, "master_func_declear"):
  212. self.master_func_declear = ir.Function(
  213. m, self.master_func_ty(),
  214. f"{self.name}_master"
  215. )
  216. return self.master_func_declear
  217. def generate_master_func(self, m: ir.Module, all_ll_master_funcs: dict[str, ir.Function]):
  218. sub_funcs = [
  219. fd.generate_ll_func(m, all_ll_master_funcs)
  220. for fd in self.funcs
  221. ]
  222. master_func = self.get_master_func_declear()
  223. count_var, *master_params = master_func.args
  224. bb_entry = master_func.append_basic_block("entry")
  225. master_entry_builder = ir.IRBuilder(bb_entry)
  226. bb_exit = master_func.append_basic_block('exit')
  227. bb_exit_builder = ir.IRBuilder(bb_exit)
  228. bb_exit_builder.ret(f64_ty(-1))
  229. bb_switch_default = master_func.append_basic_block(name='switch_default')
  230. bb_switch_default_builder = ir.IRBuilder(bb_switch_default)
  231. bb_switch_default_builder.branch(bb_exit)
  232. sw = master_entry_builder.switch(count_var, bb_switch_default)
  233. for count in set(self.arg_counts):
  234. bb_curr_switch = master_func.append_basic_block(
  235. name=self.name + f"_arg_{count}"
  236. )
  237. bb_curr_switch_builder = ir.IRBuilder(bb_curr_switch)
  238. cond_fmt_arg = bb_curr_switch_builder.bitcast(cond_global_fmt, voidptr_ty)
  239. sw.add_case(count, bb_curr_switch)
  240. 同参函数 = [f for f in self.funcs if f.arg_count == count]
  241. 同参函数 = sorted(同参函数, key=lambda x: x.const_arg_count, reverse=True)
  242. for f in 同参函数:
  243. cond = bool_ty(1)
  244. for arg_index, arg in enumerate(f.args):
  245. if arg.type != ArgumentType.CONST:
  246. continue
  247. cond = bb_curr_switch_builder.and_(
  248. cond,
  249. bb_curr_switch_builder.fcmp_unordered(
  250. '==',
  251. master_func.args[1 + arg_index],
  252. ir.Constant(
  253. master_func.args[1 + arg_index].type,
  254. float(arg.name)
  255. ),
  256. name=f'cmp_{arg.name}'
  257. )
  258. )
  259. # print(cond)
  260. # bb_curr_switch_builder.call(printf, [cond_fmt_arg, bb_curr_switch_builder.zext(cond, ir.IntType(32))])
  261. with bb_curr_switch_builder.if_then(cond):
  262. var_envs = {}
  263. # print(f"{current_func=} {current_func.args=}")
  264. for i, arg in enumerate(f.args):
  265. if arg.type == ArgumentType.VAR:
  266. var_envs[arg.name] = master_func.args[i + 1]
  267. # print(f"{f.args=}")
  268. params = [
  269. eval_line(
  270. bb_curr_switch_builder, arg,
  271. var_envs, all_ll_master_funcs
  272. )
  273. for arg in f.args
  274. ]
  275. while len(params) < f.arg_count:
  276. params.append(f64_ty(0))
  277. # print(f"{f}, {params=}")
  278. res = bb_curr_switch_builder.call(
  279. f.generate_ll_func(m, all_ll_master_funcs),
  280. params
  281. )
  282. bb_curr_switch_builder.ret(res)
  283. bb_curr_switch_builder.branch(bb_exit)
  284. # print(master_func)
  285. return master_func
  286. '''
  287. 拿到所有函数后, 按名称分组, 每个名称有一个入口函数叫做master函数, master包含比最大参数多一个参数, 第一个参数用于描述参数数量.
  288. 之后生成master的声明, 然后生成用于重载的函数, 之后按名称过一遍ast, 把对master的调用前面加上一个参数量, 最后拿着这些重载函数补全master的定义.
  289. '''
  290. ast_root = ast
  291. # def create_func(ast_root: dict):
  292. overloadable_func_dict: dict[str, OverloadableFunc] = {}
  293. all_ll_master_funcs: dict[str, ir.Function] = {}
  294. for item in ast_root['root']:
  295. if '=' in item:
  296. f = FuncData.from_ast(item)
  297. f.body = FuncData.expend_ast_func_param(f.name, f.body)
  298. if f.name in overloadable_func_dict:
  299. overloadable_func_dict[f.name].add_func(f)
  300. else:
  301. overloadable_func_dict[f.name] = OverloadableFunc(f.name, [f])
  302. for name, f in overloadable_func_dict.items():
  303. ast_root = FuncData.expend_ast_func_param(name, ast_root)
  304. all_ll_master_funcs[name] = f.get_master_func_declear()
  305. for name in overloadable_func_dict:
  306. # for fd in overloadable_func_dict[name].funcs:
  307. # fd.generate_ll_func(m, all_ll_master_funcs)
  308. (overloadable_func_dict[name].generate_master_func(m, all_ll_master_funcs))
  309. # print(overloadable_func_dict)
  310. # print(ast_root)
  311. # %%
  312. eval_line(builder, ast, {}, all_ll_master_funcs)
  313. builder.ret_void()
  314. asm = (str(m).replace(
  315. "unknown-unknown-unknown",
  316. llvm.Target.from_default_triple().triple
  317. ))
  318. print(asm)
  319. # %%
  320. llvm_module = llvm.parse_assembly(str(m))
  321. tm = llvm.Target.from_default_triple().create_target_machine()
  322. with llvm.create_mcjit_compiler(llvm_module, tm) as ee:
  323. ee.finalize_object()
  324. fptr = ee.get_function_address("main")
  325. py_func = CFUNCTYPE(None)(fptr)
  326. py_func()
  327. # %%