Skip to content

feat: the C AST used by the C codegen backend #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Oct 22, 2024
6 changes: 6 additions & 0 deletions bootstrap/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ impl Run for TestCommand {
cprintln!("<r,s>Test failed</r,s>: {}", info);
}));

cprintln!("<b>[TEST]</b> running cargo test");
let mut command = std::process::Command::new("cargo");
command.args(["test", "--manifest-path", "crates/Cargo.toml"]);
log::debug!("running {:?}", command);
assert!(command.status().unwrap().success(), "failed to run {:?}", command);

let testcases = self.collect_testcases(manifest);
cprintln!("<b>[TEST]</b> found {} testcases", testcases.len());

Expand Down
4 changes: 4 additions & 0 deletions crates/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[workspace]
resolver = "2"
members = ["rustc_codegen_c"]
members = ["rustc_codegen_c", "rustc_codegen_c_ast"]

[workspace.package]
version = "0.1.0"
10 changes: 10 additions & 0 deletions crates/rustc_codegen_c_ast/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "rustc_codegen_c_ast"
edition = "2021"
version.workspace = true

[dependencies]

# This package uses rustc crates.
[package.metadata.rust-analyzer]
rustc_private = true
15 changes: 15 additions & 0 deletions crates/rustc_codegen_c_ast/src/arena.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//! This module defines the memory arena for C AST nodes.

use crate::decl::CDeclKind;
use crate::expr::CExprKind;
use crate::func::CFuncKind;
use crate::stmt::CStmtKind;
use crate::ty::CTyKind;

rustc_arena::declare_arena!([
[] decl: CDeclKind<'tcx>,
[] expr: CExprKind<'tcx>,
[] func: CFuncKind<'tcx>,
[] stmt: CStmtKind<'tcx>,
[] ty: CTyKind<'tcx>,
]);
50 changes: 50 additions & 0 deletions crates/rustc_codegen_c_ast/src/decl.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//! This module defines AST nodes for C declarations.

use crate::expr::{CExpr, CValue};
use crate::pretty::{Print, PrinterCtx, INDENT};
use crate::ty::{print_declarator, CTy};
use crate::ModuleCtx;

/// C declarations.
pub type CDecl<'mx> = &'mx CDeclKind<'mx>;

/// C declaration kinds.
#[derive(Debug, Clone)]
pub enum CDeclKind<'mx> {
/// Variable declaration consisting of a name, type, and optional initializer.
///
/// Example:
/// - `int foo;` // `ty val`
/// - `int foo = bar` `ty val = expr`
Var { name: CValue<'mx>, ty: CTy<'mx>, init: Option<CExpr<'mx>> },
}

impl<'mx> ModuleCtx<'mx> {
/// Create a new declaration.
pub fn decl(self, decl: CDeclKind<'mx>) -> CDecl<'mx> {
self.arena().alloc(decl)
}

/// Create a new variable declaration.
pub fn var(self, name: CValue<'mx>, ty: CTy<'mx>, init: Option<CExpr<'mx>>) -> CDecl<'mx> {
self.decl(CDeclKind::Var { name, ty, init })
}
}

impl Print for CDecl<'_> {
fn print_to(&self, ctx: &mut PrinterCtx) {
match self {
CDeclKind::Var { name, ty, init } => {
ctx.ibox(INDENT, |ctx| {
print_declarator(*ty, Some(*name), ctx);
if let Some(init) = init {
ctx.word(" =");
ctx.softbreak();
init.print_to(ctx);
}
ctx.word(";");
});
}
}
}
}
133 changes: 133 additions & 0 deletions crates/rustc_codegen_c_ast/src/expr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
//! This module defines the AST nodes for C expressions.

use crate::pretty::{Print, PrinterCtx, INDENT};
use crate::ty::{print_declarator, CTy};
use crate::ModuleCtx;

/// Represents the values of C variables, parameters, and scalars.
///
/// There are two variants to distinguish between constants and variables,
/// as is done in LLVM IR. We follow the `rustc_codegen_ssa` convention for this representation.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum CValue<'mx> {
/// A constant scalar
Scalar(i128),
/// A local variable indexed by a number, in the form `_0`, `_1`, etc.
Local(usize),
/// A function name
Func(&'mx str),
}

/// C expressions.
pub type CExpr<'mx> = &'mx CExprKind<'mx>;

/// C expressions.
#[derive(Debug, Clone)]
pub enum CExprKind<'mx> {
/// A "raw" C expression, simply a string of C code, which is printed as-is.
Raw(&'static str),
/// A value, such as a constant, variable, or function name.
Value(CValue<'mx>),
/// A binary operation expression, e.g. `lhs + rhs`.
Binary { lhs: CExpr<'mx>, rhs: CExpr<'mx>, op: &'static str },
/// A type cast expression, e.g. `(int) x`.
Cast { ty: CTy<'mx>, expr: CExpr<'mx> },
/// A function call expression, e.g. `foo(x, y)`.
Call { callee: CExpr<'mx>, args: Vec<CExpr<'mx>> },
/// A member access expression, e.g. `foo.bar` or `foo->bar`.
Member {
expr: CExpr<'mx>,
/// Whether to use the `->` operator instead of `.`.
arrow: bool,
field: &'mx str,
},
}

impl<'mx> ModuleCtx<'mx> {
/// Create a new expression.
pub fn expr(&self, expr: CExprKind<'mx>) -> CExpr<'mx> {
self.arena().alloc(expr)
}

/// Create a new raw expression.
pub fn raw(&self, raw: &'static str) -> CExpr<'mx> {
self.expr(CExprKind::Raw(raw))
}

/// Create a new value expression.
pub fn value(&self, value: CValue<'mx>) -> CExpr<'mx> {
self.expr(CExprKind::Value(value))
}

/// Create a new binary expression.
pub fn binary(&self, lhs: CExpr<'mx>, rhs: CExpr<'mx>, op: &'static str) -> CExpr<'mx> {
self.expr(CExprKind::Binary { lhs, rhs, op })
}

/// Create a new cast expression.
pub fn cast(&self, ty: CTy<'mx>, expr: CExpr<'mx>) -> CExpr<'mx> {
self.expr(CExprKind::Cast { ty, expr })
}

/// Create a new function call expression.
pub fn call(&self, callee: CExpr<'mx>, args: Vec<CExpr<'mx>>) -> CExpr<'mx> {
self.expr(CExprKind::Call { callee, args })
}

/// Create a new member access expression.
pub fn member(&self, expr: CExpr<'mx>, field: &'mx str) -> CExpr<'mx> {
self.expr(CExprKind::Member { expr, field, arrow: false })
}
}

impl Print for CValue<'_> {
fn print_to(&self, ctx: &mut PrinterCtx) {
match self {
CValue::Scalar(i) => ctx.word(i.to_string()),
CValue::Local(i) => ctx.word(format!("_{}", i)),
CValue::Func(name) => ctx.word(name.to_string()),
}
}
}

impl Print for CExpr<'_> {
fn print_to(&self, ctx: &mut PrinterCtx) {
match self {
CExprKind::Raw(raw) => ctx.word(*raw),
CExprKind::Value(value) => value.print_to(ctx),
CExprKind::Binary { lhs, rhs, op } => ctx.ibox_delim(INDENT, ("(", ")"), 0, |ctx| {
ctx.ibox(-INDENT, |ctx| lhs.print_to(ctx));

ctx.softbreak();
ctx.word(*op);
ctx.nbsp();

rhs.print_to(ctx);
}),
CExprKind::Cast { ty, expr } => ctx.ibox(INDENT, |ctx| {
ctx.word("(");
print_declarator(*ty, None, ctx);
ctx.word(")");

ctx.nbsp();
expr.print_to(ctx);
}),
CExprKind::Call { callee, args } => ctx.ibox(INDENT, |ctx| {
callee.print_to(ctx);
ctx.cbox_delim(INDENT, ("(", ")"), 0, |ctx| {
ctx.seperated(",", args, |ctx, arg| arg.print_to(ctx));
});
}),
CExprKind::Member { expr, arrow, field } => ctx.cbox(INDENT, |ctx| {
expr.print_to(ctx);
ctx.zerobreak();
if *arrow {
ctx.word("->");
} else {
ctx.word(".");
}
ctx.word(field.to_string());
}),
}
}
}
91 changes: 91 additions & 0 deletions crates/rustc_codegen_c_ast/src/func.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
//! This module defines AST nodes for C functions.

use std::cell::{Cell, RefCell};

use rustc_data_structures::intern::Interned;

use crate::expr::CValue;
use crate::pretty::{Print, PrinterCtx};
use crate::stmt::{print_compound, CStmt};
use crate::ty::{print_declarator, CTy};
use crate::ModuleCtx;

/// C functions definition.
pub type CFunc<'mx> = Interned<'mx, CFuncKind<'mx>>;

/// C function definition.
#[derive(Debug, Clone)]
pub struct CFuncKind<'mx> {
/// Function name.
pub name: &'mx str,
/// Return type.
pub ty: CTy<'mx>,
/// Function parameters.
pub params: Vec<(CTy<'mx>, CValue<'mx>)>,
/// Function body.
pub body: RefCell<Vec<CStmt<'mx>>>,
/// A counter for local variables, for generating unique names.
local_var_counter: Cell<usize>,
}

impl<'mx> CFuncKind<'mx> {
/// Make a new function definition.
pub fn new(name: &'mx str, ty: CTy<'mx>, params: impl IntoIterator<Item = CTy<'mx>>) -> Self {
let params = params
.into_iter()
.enumerate()
.map(|(i, ty)| (ty, CValue::Local(i)))
.collect::<Vec<_>>();
let local_var_counter = Cell::new(params.len());

Self { name, ty, params, body: RefCell::new(Vec::new()), local_var_counter }
}

/// Push a statement to the end of the function body.
pub fn push_stmt(&self, stmt: CStmt<'mx>) {
self.body.borrow_mut().push(stmt);
}

/// Get a new unique local variable.
pub fn next_local_var(&self) -> CValue {
let val = CValue::Local(self.local_var_counter.get());
self.local_var_counter.set(self.local_var_counter.get() + 1);
val
}
}

impl<'mx> ModuleCtx<'mx> {
/// Create a new function definition.
pub fn func(&self, func: CFuncKind<'mx>) -> &'mx CFuncKind<'mx> {
self.arena().alloc(func)
}
}

impl Print for CFunc<'_> {
fn print_to(&self, ctx: &mut PrinterCtx) {
ctx.ibox(0, |ctx| {
print_signature(*self, ctx);
ctx.softbreak(); // I don't know how to avoid a newline here
print_compound(&self.0.body.borrow(), ctx);
})
}
}

pub(crate) fn print_func_decl(func: CFunc, ctx: &mut PrinterCtx) {
print_signature(func, ctx);
ctx.word(";");
}

fn print_signature(func: CFunc, ctx: &mut PrinterCtx) {
ctx.ibox(0, |ctx| {
print_declarator(func.0.ty, Some(CValue::Func(func.0.name)), ctx);

ctx.valign_delim(("(", ")"), |ctx| {
ctx.seperated(",", &func.0.params, |ctx, (ty, name)| {
ctx.ibox(0, |ctx| {
print_declarator(*ty, Some(*name), ctx);
})
})
});
});
}
Loading