From e9a9f5aaedb4821a8a2bbb5914022378926d076a Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Wed, 5 Dec 2012 15:13:24 -0800 Subject: [PATCH] Implement GLB algorithm. (Issue #2263) --- src/librustc/metadata/tyencode.rs | 3 + src/librustc/middle/astencode.rs | 3 +- src/librustc/middle/ty.rs | 33 ++- .../middle/typeck/infer/assignment.rs | 4 +- src/librustc/middle/typeck/infer/combine.rs | 1 + src/librustc/middle/typeck/infer/glb.rs | 118 +++++++- src/librustc/middle/typeck/infer/lattice.rs | 26 ++ src/librustc/middle/typeck/infer/lub.rs | 22 +- src/librustc/middle/typeck/infer/mod.rs | 11 + .../middle/typeck/infer/region_inference.rs | 252 +++++++++++++----- src/librustc/middle/typeck/infer/sub.rs | 1 + src/librustc/middle/typeck/infer/test.rs | 124 ++++++++- src/librustc/util/ppaux.rs | 5 +- src/libsyntax/codemap.rs | 4 + src/libsyntax/diagnostic.rs | 11 +- 15 files changed, 512 insertions(+), 106 deletions(-) diff --git a/src/librustc/metadata/tyencode.rs b/src/librustc/metadata/tyencode.rs index d4abf099e6ec6..2616a10cfa316 100644 --- a/src/librustc/metadata/tyencode.rs +++ b/src/librustc/metadata/tyencode.rs @@ -183,6 +183,9 @@ fn enc_bound_region(w: io::Writer, cx: @ctxt, br: ty::bound_region) { w.write_char('|'); enc_bound_region(w, cx, *br); } + ty::br_fresh(id) => { + w.write_uint(id); + } } } diff --git a/src/librustc/middle/astencode.rs b/src/librustc/middle/astencode.rs index ea018bd528e20..8ec1297de15bd 100644 --- a/src/librustc/middle/astencode.rs +++ b/src/librustc/middle/astencode.rs @@ -416,7 +416,8 @@ impl ty::Region: tr { impl ty::bound_region: tr { fn tr(xcx: extended_decode_ctxt) -> ty::bound_region { match self { - ty::br_anon(_) | ty::br_named(_) | ty::br_self => self, + ty::br_anon(_) | ty::br_named(_) | ty::br_self | + ty::br_fresh(_) => self, ty::br_cap_avoid(id, br) => ty::br_cap_avoid(xcx.tr_id(id), @br.tr(xcx)) } diff --git a/src/librustc/middle/ty.rs b/src/librustc/middle/ty.rs index 1ef91bfda880e..c281da43b222d 100644 --- a/src/librustc/middle/ty.rs +++ b/src/librustc/middle/ty.rs @@ -53,7 +53,7 @@ export expr_ty_params_and_ty; export expr_is_lval, expr_kind; export ExprKind, LvalueExpr, RvalueDatumExpr, RvalueDpsExpr, RvalueStmtExpr; export field_ty; -export fold_ty, fold_sty_to_ty, fold_region, fold_regions; +export fold_ty, fold_sty_to_ty, fold_region, fold_regions, fold_sig; export apply_op_on_t_to_ty_fn; export fold_regions_and_ty, walk_regions_and_ty; export field; @@ -126,7 +126,7 @@ export ty_struct; export Region, bound_region, encl_region; export re_bound, re_free, re_scope, re_static, re_infer; export ReVar, ReSkolemized; -export br_self, br_anon, br_named, br_cap_avoid; +export br_self, br_anon, br_named, br_cap_avoid, br_fresh; export get, type_has_params, type_needs_infer, type_has_regions; export type_is_region_ptr; export type_id; @@ -585,6 +585,9 @@ enum bound_region { /// Named region parameters for functions (a in &a/T) br_named(ast::ident), + /// Fresh bound identifiers created during GLB computations. + br_fresh(uint), + /** * Handles capture-avoiding substitution in a rather subtle case. If you * have a closure whose argument types are being inferred based on the @@ -1285,6 +1288,17 @@ fn fold_sty_to_ty(tcx: ty::ctxt, sty: &sty, foldop: fn(t) -> t) -> t { mk_t(tcx, fold_sty(sty, foldop)) } +fn fold_sig(sig: &FnSig, fldop: fn(t) -> t) -> FnSig { + let args = do sig.inputs.map |arg| { + { mode: arg.mode, ty: fldop(arg.ty) } + }; + + FnSig { + inputs: move args, + output: fldop(sig.output) + } +} + fn fold_sty(sty: &sty, fldop: fn(t) -> t) -> sty { fn fold_substs(substs: &substs, fldop: fn(t) -> t) -> substs { {self_r: substs.self_r, @@ -1463,8 +1477,8 @@ fn apply_op_on_t_to_ty_fn( fn fold_regions( cx: ctxt, ty: t, - fldr: fn(r: Region, in_fn: bool) -> Region) -> t { - + fldr: fn(r: Region, in_fn: bool) -> Region) -> t +{ fn do_fold(cx: ctxt, ty: t, in_fn: bool, fldr: fn(Region, bool) -> Region) -> t { if !type_has_regions(ty) { return ty; } @@ -2690,7 +2704,10 @@ impl bound_region : to_bytes::IterBytes { to_bytes::iter_bytes_2(&2u8, ident, lsb0, f), ty::br_cap_avoid(ref id, ref br) => - to_bytes::iter_bytes_3(&3u8, id, br, lsb0, f) + to_bytes::iter_bytes_3(&3u8, id, br, lsb0, f), + + ty::br_fresh(ref x) => + to_bytes::iter_bytes_2(&4u8, x, lsb0, f) } } } @@ -4452,6 +4469,12 @@ impl bound_region : cmp::Eq { _ => false } } + br_fresh(e0a) => { + match (*other) { + br_fresh(e0b) => e0a == e0b, + _ => false + } + } } } pure fn ne(&self, other: &bound_region) -> bool { !(*self).eq(other) } diff --git a/src/librustc/middle/typeck/infer/assignment.rs b/src/librustc/middle/typeck/infer/assignment.rs index 2ceff545eb73a..2b1c103b9600d 100644 --- a/src/librustc/middle/typeck/infer/assignment.rs +++ b/src/librustc/middle/typeck/infer/assignment.rs @@ -75,7 +75,7 @@ enum Assign = combine_fields; impl Assign { fn tys(a: ty::t, b: ty::t) -> ares { - debug!("Assign.tys(%s -> %s)", + debug!("Assign.tys(%s => %s)", a.to_str(self.infcx), b.to_str(self.infcx)); let _r = indenter(); @@ -124,7 +124,7 @@ priv impl Assign { a: ty::t, b: ty::t, +a_bnd: Option, +b_bnd: Option) -> ares { - debug!("Assign.assign_tys_or_sub(%s -> %s, %s -> %s)", + debug!("Assign.assign_tys_or_sub(%s => %s, %s => %s)", a.to_str(self.infcx), b.to_str(self.infcx), a_bnd.to_str(self.infcx), b_bnd.to_str(self.infcx)); let _r = indenter(); diff --git a/src/librustc/middle/typeck/infer/combine.rs b/src/librustc/middle/typeck/infer/combine.rs index 1972be22b02ff..50d449cdca404 100644 --- a/src/librustc/middle/typeck/infer/combine.rs +++ b/src/librustc/middle/typeck/infer/combine.rs @@ -64,6 +64,7 @@ trait combine { fn infcx() -> infer_ctxt; fn tag() -> ~str; fn a_is_expected() -> bool; + fn span() -> span; fn sub() -> Sub; fn lub() -> Lub; diff --git a/src/librustc/middle/typeck/infer/glb.rs b/src/librustc/middle/typeck/infer/glb.rs index 4d00b5f7d3c1d..4ced7249648b7 100644 --- a/src/librustc/middle/typeck/infer/glb.rs +++ b/src/librustc/middle/typeck/infer/glb.rs @@ -19,6 +19,7 @@ impl Glb: combine { fn infcx() -> infer_ctxt { self.infcx } fn tag() -> ~str { ~"glb" } fn a_is_expected() -> bool { self.a_is_expected } + fn span() -> span { self.span } fn sub() -> Sub { Sub(*self) } fn lub() -> Lub { Lub(*self) } @@ -152,7 +153,122 @@ impl Glb: combine { } fn fns(a: &ty::FnTy, b: &ty::FnTy) -> cres { - super_fns(&self, a, b) + // Note: this is a subtle algorithm. For a full explanation, + // please see the large comment in `region_inference.rs`. + + debug!("%s.fns(%?, %?)", + self.tag(), a.to_str(self.infcx), b.to_str(self.infcx)); + let _indenter = indenter(); + + // Take a snapshot. We'll never roll this back, but in later + // phases we do want to be able to examine "all bindings that + // were created as part of this type comparison", and making a + // snapshot is a convenient way to do that. + let snapshot = self.infcx.region_vars.start_snapshot(); + + // Instantiate each bound region with a fresh region variable. + let (a_with_fresh, a_isr) = + self.infcx.replace_bound_regions_with_fresh_regions( + self.span, a); + let a_vars = var_ids(&self, a_isr); + let (b_with_fresh, b_isr) = + self.infcx.replace_bound_regions_with_fresh_regions( + self.span, b); + let b_vars = var_ids(&self, b_isr); + + // Collect constraints. + let fn_ty0 = if_ok!(super_fns(&self, &a_with_fresh, &b_with_fresh)); + debug!("fn_ty0 = %s", fn_ty0.to_str(self.infcx)); + + // Generalize the regions appearing in fn_ty0 if possible + let new_vars = + self.infcx.region_vars.vars_created_since_snapshot(snapshot); + let fn_ty1 = + self.infcx.fold_regions_in_sig( + &fn_ty0, + |r, _in_fn| generalize_region(&self, snapshot, + new_vars, a_isr, a_vars, b_vars, + r)); + debug!("fn_ty1 = %s", fn_ty1.to_str(self.infcx)); + return Ok(move fn_ty1); + + fn generalize_region(self: &Glb, + snapshot: uint, + new_vars: &[RegionVid], + a_isr: isr_alist, + a_vars: &[RegionVid], + b_vars: &[RegionVid], + r0: ty::Region) -> ty::Region { + if !is_var_in_set(new_vars, r0) { + return r0; + } + + let tainted = self.infcx.region_vars.tainted(snapshot, r0); + + let mut a_r = None, b_r = None, only_new_vars = true; + for tainted.each |r| { + if is_var_in_set(a_vars, *r) { + if a_r.is_some() { + return fresh_bound_variable(self); + } else { + a_r = Some(*r); + } + } else if is_var_in_set(b_vars, *r) { + if b_r.is_some() { + return fresh_bound_variable(self); + } else { + b_r = Some(*r); + } + } else if !is_var_in_set(new_vars, *r) { + only_new_vars = false; + } + } + + // NB---I do not believe this algorithm computes + // (necessarily) the GLB. As written it can + // spuriously fail. In particular, if there is a case + // like: fn(fn(&a)) and fn(fn(&b)), where a and b are + // free, it will return fn(&c) where c = GLB(a,b). If + // however this GLB is not defined, then the result is + // an error, even though something like + // "fn(fn(&X))" where X is bound would be a + // subtype of both of those. + // + // The problem is that if we were to return a bound + // variable, we'd be computing a lower-bound, but not + // necessarily the *greatest* lower-bound. + + if a_r.is_some() && b_r.is_some() && only_new_vars { + // Related to exactly one bound variable from each fn: + return rev_lookup(self, a_isr, a_r.get()); + } else if a_r.is_none() && b_r.is_none() { + // Not related to bound variables from either fn: + return r0; + } else { + // Other: + return fresh_bound_variable(self); + } + } + + fn rev_lookup(self: &Glb, + a_isr: isr_alist, + r: ty::Region) -> ty::Region + { + for list::each(a_isr) |pair| { + let (a_br, a_r) = *pair; + if a_r == r { + return ty::re_bound(a_br); + } + } + + self.infcx.tcx.sess.span_bug( + self.span, + fmt!("could not find original bound region for %?", r)); + } + + fn fresh_bound_variable(self: &Glb) -> ty::Region { + self.infcx.region_vars.new_bound() + } } fn fn_metas(a: &ty::FnMeta, b: &ty::FnMeta) -> cres { diff --git a/src/librustc/middle/typeck/infer/lattice.rs b/src/librustc/middle/typeck/infer/lattice.rs index fba0c5650040e..f31ae2af71f9a 100644 --- a/src/librustc/middle/typeck/infer/lattice.rs +++ b/src/librustc/middle/typeck/infer/lattice.rs @@ -156,3 +156,29 @@ fn lattice_var_and_t( } } } + +// ___________________________________________________________________________ +// Random utility functions used by LUB/GLB when computing LUB/GLB of +// fn types + +fn var_ids(self: &T, isr: isr_alist) -> ~[RegionVid] { + let mut result = ~[]; + for list::each(isr) |pair| { + match pair.second() { + ty::re_infer(ty::ReVar(r)) => { result.push(r); } + r => { + self.infcx().tcx.sess.span_bug( + self.span(), + fmt!("Found non-region-vid: %?", r)); + } + } + } + return result; +} + +fn is_var_in_set(new_vars: &[RegionVid], r: ty::Region) -> bool { + match r { + ty::re_infer(ty::ReVar(ref v)) => new_vars.contains(v), + _ => false + } +} diff --git a/src/librustc/middle/typeck/infer/lub.rs b/src/librustc/middle/typeck/infer/lub.rs index 563435d4600ee..65d4957e5b43a 100644 --- a/src/librustc/middle/typeck/infer/lub.rs +++ b/src/librustc/middle/typeck/infer/lub.rs @@ -26,6 +26,7 @@ impl Lub: combine { fn infcx() -> infer_ctxt { self.infcx } fn tag() -> ~str { ~"lub" } fn a_is_expected() -> bool { self.a_is_expected } + fn span() -> span { self.span } fn sub() -> Sub { Sub(*self) } fn lub() -> Lub { Lub(*self) } @@ -142,12 +143,10 @@ impl Lub: combine { let new_vars = self.infcx.region_vars.vars_created_since_snapshot(snapshot); let fn_ty1 = - ty::apply_op_on_t_to_ty_fn( - self.infcx.tcx, &fn_ty0, - |t| ty::fold_regions( - self.infcx.tcx, t, - |r, _in_fn| generalize_region(&self, snapshot, - new_vars, a_isr, r))); + self.infcx.fold_regions_in_sig( + &fn_ty0, + |r, _in_fn| generalize_region(&self, snapshot, new_vars, + a_isr, r)); return Ok(move fn_ty1); fn generalize_region(self: &Lub, @@ -156,7 +155,7 @@ impl Lub: combine { a_isr: isr_alist, r0: ty::Region) -> ty::Region { // Regions that pre-dated the LUB computation stay as they are. - if !is_new_var(new_vars, r0) { + if !is_var_in_set(new_vars, r0) { debug!("generalize_region(r0=%?): not new variable", r0); return r0; } @@ -166,7 +165,7 @@ impl Lub: combine { // Variables created during LUB computation which are // *related* to regions that pre-date the LUB computation // stay as they are. - if !tainted.all(|r| is_new_var(new_vars, *r)) { + if !tainted.all(|r| is_var_in_set(new_vars, *r)) { debug!("generalize_region(r0=%?): \ non-new-variables found in %?", r0, tainted); @@ -193,13 +192,6 @@ impl Lub: combine { fmt!("Region %? is not associated with \ any bound region from A!", r0)); } - - fn is_new_var(new_vars: &[RegionVid], r: ty::Region) -> bool { - match r { - ty::re_infer(ty::ReVar(ref v)) => new_vars.contains(v), - _ => false - } - } } fn fn_metas(a: &ty::FnMeta, b: &ty::FnMeta) -> cres { diff --git a/src/librustc/middle/typeck/infer/mod.rs b/src/librustc/middle/typeck/infer/mod.rs index 0d0a1d761cc14..84c4654625b49 100644 --- a/src/librustc/middle/typeck/infer/mod.rs +++ b/src/librustc/middle/typeck/infer/mod.rs @@ -776,5 +776,16 @@ impl infer_ctxt { (fn_ty, isr) } + fn fold_regions_in_sig( + &self, + fn_ty: &ty::FnTy, + fldr: &fn(r: ty::Region, in_fn: bool) -> ty::Region) -> ty::FnTy + { + let sig = do ty::fold_sig(&fn_ty.sig) |t| { + ty::fold_regions(self.tcx, t, fldr) + }; + ty::FnTyBase {meta: fn_ty.meta, sig: sig} + } + } diff --git a/src/librustc/middle/typeck/infer/region_inference.rs b/src/librustc/middle/typeck/infer/region_inference.rs index f49d174001869..b8a7f9209b048 100644 --- a/src/librustc/middle/typeck/infer/region_inference.rs +++ b/src/librustc/middle/typeck/infer/region_inference.rs @@ -337,6 +337,61 @@ therefore considering subtyping and in particular does not consider LUB or GLB computation. We have to consider this. Here is the algorithm I implemented. +First though, let's discuss what we are trying to compute in more +detail. The LUB is basically the "common supertype" and the GLB is +"common subtype"; one catch is that the LUB should be the +*most-specific* common supertype and the GLB should be *most general* +common subtype (as opposed to any common supertype or any common +subtype). + +Anyway, to help clarify, here is a table containing some +function pairs and their LUB/GLB: + +``` +Type 1 Type 2 LUB GLB +fn(&a) fn(&X) fn(&X) fn(&a) +fn(&A) fn(&X) -- fn(&a) +fn(&a, &b) fn(&x, &x) fn(&a, &a) fn(&a, &b) +fn(&a, &b, &a) fn(&x, &y, &y) fn(&a, &a, &a) fn(&a,&b,&c) +``` + +### Conventions + +I use lower-case letters (e.g., `&a`) for bound regions and upper-case +letters for free regions (`&A`). Region variables written with a +dollar-sign (e.g., `$a`). I will try to remember to enumerate the +bound-regions on the fn type as well (e.g., `fn(&a)`). + +### High-level summary + +Both the LUB and the GLB algorithms work in a similar fashion. They +begin by replacing all bound regions (on both sides) with fresh region +inference variables. Therefore, both functions are converted to types +that contain only free regions. We can then compute the LUB/GLB in a +straightforward way, as described in `combine.rs`. This results in an +interim type T. The algorithms then examine the regions that appear +in T and try to, in some cases, replace them with bound regions to +yield the final result. + +To decide whether to replace a region `R` that appears in `T` with a +bound region, the algorithms make use of two bits of information. +First is a set `V` that contains all region variables created as part +of the LUB/GLB computation. `V` will contain the region variables +created to replace the bound regions in the input types, but it also +contains 'intermediate' variables created to represent the LUB/GLB of +individual regions. Basically, when asked to compute the LUB/GLB of a +region variable with another region, the inferencer cannot oblige +immediately since the valuese of that variables are not known. +Therefore, it creates a new variable that is related to the two +regions. For example, the LUB of two variables `$x` and `$y` is a +fresh variable `$z` that is constrained such that `$x <= $z` and `$y +<= $z`. So `V` will contain these intermediate variables as well. + +The other important factor in deciding how to replace a region in T is +the function `Tainted($r)` which, for a region variable, identifies +all regions that the region variable is related to in some way +(`Tainted()` made an appearance in the subtype computation as well). + ### LUB The LUB algorithm proceeds in three steps: @@ -345,47 +400,28 @@ The LUB algorithm proceeds in three steps: inference variables. 2. Compute the LUB "as normal", meaning compute the GLB of each pair of argument types and the LUB of the return types and - so forth. Combine those to a new function type F. -3. Map the regions appearing in `F` using the procedure described below. - -For each region `R` that appears in `F`, we may need to replace it -with a bound region. Let `V` be the set of fresh variables created as -part of the LUB procedure (either in step 1 or step 2). You may be -wondering how variables can be created in step 2. The answer is that -when we are asked to compute the LUB or GLB of two region variables, -we do so by producing a new region variable that is related to those -two variables. i.e., The LUB of two variables `$x` and `$y` is a -fresh variable `$z` that is constrained such that `$x <= $z` and `$y -<= $z`. - -To decide how to replace a region `R`, we must examine `Tainted(R)`. -This function searches through the constraints which were generated -when computing the bounds of all the argument and return types and -produces a list of all regions to which `R` is related, directly or -indirectly. - -If `R` is not in `V` or `Tainted(R)` contains any region that is not -in `V`, then `R` is not replaced (that is, `R` is mapped to itself). -Otherwise, if `Tainted(R)` is a subset of `V`, then we select the -earliest variable in `Tainted(R)` that originates from the left-hand -side and replace `R` with a bound version of that variable. - -So, let's work through the simplest example: `fn(&A)` and `fn(&a)`. -In this case, `&a` will be replaced with `$a` (the $ indicates an -inference variable) which will be linked to the free region `&A`, and -hence `V = { $a }` and `Tainted($a) = { &A }`. Since `$a` is not a -member of `V`, we leave `$a` as is. When region inference happens, -`$a` will be resolved to `&A`, as we wanted. - -So, let's work through the simplest example: `fn(&A)` and `fn(&a)`. -In this case, `&a` will be replaced with `$a` (the $ indicates an -inference variable) which will be linked to the free region `&A`, and -hence `V = { $a }` and `Tainted($a) = { $a, &A }`. Since `&A` is not a -member of `V`, we leave `$a` as is. When region inference happens, -`$a` will be resolved to `&A`, as we wanted. - -Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`. -In this case, we'll end up with a graph that looks like: + so forth. Combine those to a new function type `F`. +3. Replace each region `R` that appears in `F` as follows: + - Let `V` be the set of variables created during the LUB + computational steps 1 and 2, as described in the previous section. + - If `R` is not in `V`, replace `R` with itself. + - If `Tainted(R)` contains a region that is not in `V`, + replace `R` with itself. + - Otherwise, select the earliest variable in `Tainted(R)` that originates + from the left-hand side and replace `R` with the bound region that + this variable was a replacement for. + +So, let's work through the simplest example: `fn(&A)` and `fn(&a)`. +In this case, `&a` will be replaced with `$a` and the interim LUB type +`fn($b)` will be computed, where `$b=GLB(&A,$a)`. Therefore, `V = +{$a, $b}` and `Tainted($b) = { $b, $a, &A }`. When we go to replace +`$b`, we find that since `&A \in Tainted($b)` is not a member of `V`, +we leave `$b` as is. When region inference happens, `$b` will be +resolved to `&A`, as we wanted. + +Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`. In +this case, we'll end up with a (pre-replacement) LUB type of `fn(&g, +&h)` and a graph that looks like: ``` $a $b *--$x @@ -399,8 +435,8 @@ the LUB/GLB of things requiring inference. This means that `V` and `Tainted` will look like: ``` -V = {$a, $b, $x} -Tainted($g) = Tainted($h) = { $a, $b, $h, $x } +V = {$a, $b, $g, $h, $x} +Tainted($g) = Tainted($h) = { $a, $b, $h, $g, $x } ``` Therefore we replace both `$g` and `$h` with `$a`, and end up @@ -413,40 +449,90 @@ in computing the replacements for the various variables. For each region `R` that appears in the type `F`, we again compute `Tainted(R)` and examine the results: -1. If `Tainted(R) = {R}` is a singleton set, replace `R` with itself. +1. If `R` is not in `V`, it is not replaced. 2. Else, if `Tainted(R)` contains only variables in `V`, and it contains exactly one variable from the LHS and one variable from the RHS, then `R` can be mapped to the bound version of the variable from the LHS. -3. Else, `R` is mapped to a fresh bound variable. +3. Else, if `Tainted(R)` contains no variable from the LHS and no + variable from the RHS, then `R` can be mapped to itself. +4. Else, `R` is mapped to a fresh bound variable. These rules are pretty complex. Let's look at some examples to see how they play out. -Out first example was `fn(&a)` and `fn(&X)`---in -this case, the LUB will be a variable `$g`, and `Tainted($g) = -{$g,$a,$x}`. By these rules, we'll replace `$g` with a fresh bound -variable, so the result is `fn(&z)`, which is fine. - -The next example is `fn(&A)` and `fn(&Z)`. XXX - -The next example is `fn(&a, &b)` and `fn(&x, &x)`. In this case, as -before, we'll end up with `F=fn(&g, &h)` where `Tainted($g) = -Tainted($h) = {$g, $a, $b, $x}`. This means that we'll select fresh -bound varibales `g` and `h` and wind up with `fn(&g, &h)`. +Out first example was `fn(&a)` and `fn(&X)`. In this case, `&a` will +be replaced with `$a` and we will ultimately compute a +(pre-replacement) GLB type of `fn($g)` where `$g=LUB($a,&X)`. +Therefore, `V={$a,$g}` and `Tainted($g)={$g,$a,&X}. To find the +replacement for `$g` we consult the rules above: +- Rule (1) does not apply because `$g \in V` +- Rule (2) does not apply because `&X \in Tainted($g)` +- Rule (3) does not apply because `$a \in Tainted($g)` +- Hence, by rule (4), we replace `$g` with a fresh bound variable `&z`. +So our final result is `fn(&z)`, which is correct. + +The next example is `fn(&A)` and `fn(&Z)`. In this case, we will again +have a (pre-replacement) GLB of `fn(&g)`, where `$g = LUB(&A,&Z)`. +Therefore, `V={$g}` and `Tainted($g) = {$g, &A, &Z}`. In this case, +by rule (3), `$g` is mapped to itself, and hence the result is +`fn($g)`. This result is correct (in this case, at least), but it is +indicative of a case that *can* lead us into concluding that there is +no GLB when in fact a GLB does exist. See the section "Questionable +Results" below for more details. + +The next example is `fn(&a, &b)` and `fn(&c, &c)`. In this case, as +before, we'll end up with `F=fn($g, $h)` where `Tainted($g) = +Tainted($h) = {$g, $h, $a, $b, $c}`. Only rule (4) applies and hence +we'll select fresh bound variables `y` and `z` and wind up with +`fn(&y, &z)`. For the last example, let's consider what may seem trivial, but is -not: `fn(&a, &a)` and `fn(&x, &x)`. In this case, we'll get `F=fn(&g, -&h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a, +not: `fn(&a, &a)` and `fn(&b, &b)`. In this case, we'll get `F=fn($g, +$h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a, $x}`. Both of these sets contain exactly one bound variable from each -side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`. -Horray! - -### Why are these correct? - -You may be wondering whether this algorithm is correct. So am I. But -I believe it is. (Justification forthcoming, haven't had time to -write it) +side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`, which +is the desired result. + +### Shortcomings and correctness + +You may be wondering whether this algorithm is correct. The answer is +"sort of". There are definitely cases where they fail to compute a +result even though a correct result exists. I believe, though, that +if they succeed, then the result is valid, and I will attempt to +convince you. The basic argument is that the "pre-replacement" step +computes a set of constraints. The replacements, then, attempt to +satisfy those constraints, using bound identifiers where needed. + +For now I will briefly go over the cases for LUB/GLB and identify +their intent: + +- LUB: + - The region variables that are substituted in place of bound regions + are intended to collect constraints on those bound regions. + - If Tainted(R) contains only values in V, then this region is unconstrained + and can therefore be generalized, otherwise it cannot. +- GLB: + - The region variables that are substituted in place of bound regions + are intended to collect constraints on those bound regions. + - If Tainted(R) contains exactly one variable from each side, and + only variables in V, that indicates that those two bound regions + must be equated. + - Otherwise, if Tainted(R) references any variables from left or right + side, then it is trying to combine a bound region with a free one or + multiple bound regions, so we need to select fresh bound regions. + +Sorry this is more of a shorthand to myself. I will try to write up something +more convincing in the future. + +#### Where are the algorithms wrong? + +- The pre-replacement computation can fail even though using a + bound-region would have succeeded. +- We will compute GLB(fn(fn($a)), fn(fn($b))) as fn($c) where $c is the + GLB of $a and $b. But if inference finds that $a and $b must be mapped + to regions without a GLB, then this is effectively a failure to compute + the GLB. However, the result `fn<$c>(fn($c))` is a valid GLB. */ @@ -462,7 +548,7 @@ use std::list::{List, Nil, Cons}; use region::is_subregion_of; use ty::{Region, RegionVid, re_static, re_infer, re_free, re_bound, - re_scope, ReVar, ReSkolemized}; + re_scope, ReVar, ReSkolemized, br_fresh}; use syntax::codemap; use to_str::ToStr; use util::ppaux::note_and_explain_region; @@ -547,6 +633,7 @@ struct RegionVarBindings { lubs: CombineMap, glbs: CombineMap, mut skolemization_count: uint, + mut bound_count: uint, // The undo log records actions that might later be undone. // @@ -573,6 +660,7 @@ fn RegionVarBindings(tcx: ty::ctxt) -> RegionVarBindings { lubs: CombineMap(), glbs: CombineMap(), skolemization_count: 0, + bound_count: 0, undo_log: ~[] } } @@ -649,6 +737,26 @@ impl RegionVarBindings { re_infer(ReSkolemized(sc, br)) } + fn new_bound(&self) -> Region { + // Creates a fresh bound variable for use in GLB computations. + // See discussion of GLB computation in the large comment at + // the top of this file for more details. + // + // This computation is mildly wrong in the face of rollover. + // It's conceivable, if unlikely, that one might wind up with + // accidental capture for nested functions in that case, if + // the outer function had bound regions created a very long + // time before and the inner function somehow wound up rolling + // over such that supposedly fresh identifiers were in fact + // shadowed. We should convert our bound_region + // representation to use deBruijn indices or something like + // that to eliminate that possibility. + + let sc = self.bound_count; + self.bound_count += 1; + re_bound(br_fresh(sc)) + } + fn add_constraint(&self, +constraint: Constraint, span: span) { // cannot add constraints once regions are resolved assert self.values.is_empty(); @@ -681,6 +789,16 @@ impl RegionVarBindings { self.add_constraint(ConstrainVarSubReg(sub_id, r), span); Ok(()) } + (re_bound(br), _) => { + self.tcx.sess.span_bug( + span, + fmt!("Cannot relate bound region as subregion: %?", br)); + } + (_, re_bound(br)) => { + self.tcx.sess.span_bug( + span, + fmt!("Cannot relate bound region as superregion: %?", br)); + } _ => { if self.is_subregion_of(sub, sup) { Ok(()) diff --git a/src/librustc/middle/typeck/infer/sub.rs b/src/librustc/middle/typeck/infer/sub.rs index b7769ac876d1b..cd4aa240e3252 100644 --- a/src/librustc/middle/typeck/infer/sub.rs +++ b/src/librustc/middle/typeck/infer/sub.rs @@ -21,6 +21,7 @@ impl Sub: combine { fn infcx() -> infer_ctxt { self.infcx } fn tag() -> ~str { ~"sub" } fn a_is_expected() -> bool { self.a_is_expected } + fn span() -> span { self.span } fn sub() -> Sub { Sub(*self) } fn lub() -> Lub { Lub(*self) } diff --git a/src/librustc/middle/typeck/infer/test.rs b/src/librustc/middle/typeck/infer/test.rs index c585b45e7ac00..fdc8f9906c96b 100644 --- a/src/librustc/middle/typeck/infer/test.rs +++ b/src/librustc/middle/typeck/infer/test.rs @@ -16,6 +16,7 @@ Note: This module is only compiled when doing unit testing. */ +use dvec::DVec; use std::getopts; use std::map::HashMap; use std::getopts; @@ -35,7 +36,8 @@ use middle::ty::{FnTyBase, FnMeta, FnSig}; struct Env { crate: @ast::crate, tcx: ty::ctxt, - infcx: infer::infer_ctxt + infcx: infer::infer_ctxt, + err_messages: @DVec<~str> } struct RH { @@ -43,10 +45,14 @@ struct RH { sub: &[RH] } +const EMPTY_SOURCE_STR: &str = "/* Hello, world! */"; + fn setup_env(test_name: &str, source_string: &str) -> Env { - let matches = &getopts(~[~"-Z", ~"verbose"], optgroups()).get(); - let sessopts = build_session_options(~"rustc", matches, diagnostic::emit); - let sess = build_session(sessopts, diagnostic::emit); + let messages = @DVec(); + let matches = getopts(~[~"-Z", ~"verbose"], optgroups()).get(); + let diag = diagnostic::collect(messages); + let sessopts = build_session_options(~"rustc", &matches, diag); + let sess = build_session(sessopts, diag); let cfg = build_configuration(sess, ~"whatever", str_input(~"")); let dm = HashMap(); let amap = HashMap(); @@ -65,7 +71,10 @@ fn setup_env(test_name: &str, source_string: &str) -> Env { let infcx = infer::new_infer_ctxt(tcx); - return Env { crate: crate, tcx: tcx, infcx: infcx }; + return Env {crate: crate, + tcx: tcx, + infcx: infcx, + err_messages: messages}; } impl Env { @@ -210,6 +219,22 @@ impl Env { fn lub() -> Lub { Lub(self.infcx.combine_fields(true, dummy_sp())) } + fn glb() -> Glb { Glb(self.infcx.combine_fields(true, dummy_sp())) } + + fn resolve_regions(exp_count: uint) { + debug!("resolve_regions(%u)", exp_count); + + self.infcx.resolve_regions(); + if self.err_messages.len() != exp_count { + for self.err_messages.each |msg| { + debug!("Error encountered: %s", *msg); + } + fmt!("Resolving regions encountered %u errors but expected %u!", + self.err_messages.len(), + exp_count); + } + } + /// Checks that `LUB(t1,t2) == t_lub` fn check_lub(&self, t1: ty::t, t2: ty::t, t_lub: ty::t) { match self.lub().tys(t1, t2) { @@ -222,6 +247,30 @@ impl Env { // sanity check for good measure: self.assert_subtype(t1, t); self.assert_subtype(t2, t); + + self.resolve_regions(0); + } + } + } + + /// Checks that `GLB(t1,t2) == t_glb` + fn check_glb(&self, t1: ty::t, t2: ty::t, t_glb: ty::t) { + debug!("check_glb(t1=%s, t2=%s, t_glb=%s)", + self.ty_to_str(t1), + self.ty_to_str(t2), + self.ty_to_str(t_glb)); + match self.glb().tys(t1, t2) { + Err(e) => { + fail fmt!("Unexpected error computing LUB: %?", e) + } + Ok(t) => { + self.assert_eq(t, t_glb); + + // sanity check for good measure: + self.assert_subtype(t, t1); + self.assert_subtype(t, t2); + + self.resolve_regions(0); } } } @@ -236,11 +285,22 @@ impl Env { } } } + + /// Checks that `GLB(t1,t2)` is undefined + fn check_no_glb(&self, t1: ty::t, t2: ty::t) { + match self.glb().tys(t1, t2) { + Err(_) => {} + Ok(t) => { + fail fmt!("Unexpected success computing GLB: %?", + self.ty_to_str(t)) + } + } + } } #[test] fn contravariant_region_ptr() { - let env = setup_env("contravariant_region_ptr", ""); + let env = setup_env("contravariant_region_ptr", EMPTY_SOURCE_STR); env.create_simple_region_hierarchy(); let t_rptr1 = env.t_rptr_scope(1); let t_rptr10 = env.t_rptr_scope(10); @@ -251,7 +311,7 @@ fn contravariant_region_ptr() { #[test] fn lub_bound_bound() { - let env = setup_env("lub_bound_bound", ""); + let env = setup_env("lub_bound_bound", EMPTY_SOURCE_STR); let t_rptr_bound1 = env.t_rptr_bound(1); let t_rptr_bound2 = env.t_rptr_bound(2); env.check_lub(env.t_fn([t_rptr_bound1], env.t_int()), @@ -261,7 +321,7 @@ fn lub_bound_bound() { #[test] fn lub_bound_free() { - let env = setup_env("lub_bound_free", ""); + let env = setup_env("lub_bound_free", EMPTY_SOURCE_STR); let t_rptr_bound1 = env.t_rptr_bound(1); let t_rptr_free1 = env.t_rptr_free(0, 1); env.check_lub(env.t_fn([t_rptr_bound1], env.t_int()), @@ -271,7 +331,7 @@ fn lub_bound_free() { #[test] fn lub_bound_static() { - let env = setup_env("lub_bound_static", ""); + let env = setup_env("lub_bound_static", EMPTY_SOURCE_STR); let t_rptr_bound1 = env.t_rptr_bound(1); let t_rptr_static = env.t_rptr_static(); env.check_lub(env.t_fn([t_rptr_bound1], env.t_int()), @@ -281,7 +341,7 @@ fn lub_bound_static() { #[test] fn lub_bound_bound_inverse_order() { - let env = setup_env("lub_bound_bound_inverse_order", ""); + let env = setup_env("lub_bound_bound_inverse_order", EMPTY_SOURCE_STR); let t_rptr_bound1 = env.t_rptr_bound(1); let t_rptr_bound2 = env.t_rptr_bound(2); env.check_lub(env.t_fn([t_rptr_bound1, t_rptr_bound2], t_rptr_bound1), @@ -291,7 +351,7 @@ fn lub_bound_bound_inverse_order() { #[test] fn lub_free_free() { - let env = setup_env("lub_free_free", ""); + let env = setup_env("lub_free_free", EMPTY_SOURCE_STR); let t_rptr_free1 = env.t_rptr_free(0, 1); let t_rptr_free2 = env.t_rptr_free(0, 2); let t_rptr_static = env.t_rptr_static(); @@ -302,10 +362,50 @@ fn lub_free_free() { #[test] fn lub_returning_scope() { - let env = setup_env("lub_returning_scope", ""); + let env = setup_env("lub_returning_scope", EMPTY_SOURCE_STR); let t_rptr_scope10 = env.t_rptr_scope(10); let t_rptr_scope11 = env.t_rptr_scope(11); env.check_no_lub(env.t_fn([], t_rptr_scope10), env.t_fn([], t_rptr_scope11)); } +#[test] +fn glb_free_free_with_common_scope() { + let env = setup_env("glb_free_free", EMPTY_SOURCE_STR); + let t_rptr_free1 = env.t_rptr_free(0, 1); + let t_rptr_free2 = env.t_rptr_free(0, 2); + let t_rptr_scope = env.t_rptr_scope(0); + env.check_glb(env.t_fn([t_rptr_free1], env.t_int()), + env.t_fn([t_rptr_free2], env.t_int()), + env.t_fn([t_rptr_scope], env.t_int())); +} + +#[test] +fn glb_bound_bound() { + let env = setup_env("glb_bound_bound", EMPTY_SOURCE_STR); + let t_rptr_bound1 = env.t_rptr_bound(1); + let t_rptr_bound2 = env.t_rptr_bound(2); + env.check_glb(env.t_fn([t_rptr_bound1], env.t_int()), + env.t_fn([t_rptr_bound2], env.t_int()), + env.t_fn([t_rptr_bound1], env.t_int())); +} + +#[test] +fn glb_bound_free() { + let env = setup_env("glb_bound_free", EMPTY_SOURCE_STR); + let t_rptr_bound1 = env.t_rptr_bound(1); + let t_rptr_free1 = env.t_rptr_free(0, 1); + env.check_glb(env.t_fn([t_rptr_bound1], env.t_int()), + env.t_fn([t_rptr_free1], env.t_int()), + env.t_fn([t_rptr_bound1], env.t_int())); +} + +#[test] +fn glb_bound_static() { + let env = setup_env("glb_bound_static", EMPTY_SOURCE_STR); + let t_rptr_bound1 = env.t_rptr_bound(1); + let t_rptr_static = env.t_rptr_static(); + env.check_glb(env.t_fn([t_rptr_bound1], env.t_int()), + env.t_fn([t_rptr_static], env.t_int()), + env.t_fn([t_rptr_bound1], env.t_int())); +} diff --git a/src/librustc/util/ppaux.rs b/src/librustc/util/ppaux.rs index 069a9c2875d55..d094689af56fe 100644 --- a/src/librustc/util/ppaux.rs +++ b/src/librustc/util/ppaux.rs @@ -13,7 +13,8 @@ use middle::ty; use middle::ty::{arg, canon_mode}; use middle::ty::{bound_copy, bound_const, bound_owned, bound_send, bound_trait}; -use middle::ty::{bound_region, br_anon, br_named, br_self, br_cap_avoid}; +use middle::ty::{bound_region, br_anon, br_named, br_self, br_cap_avoid, + br_fresh}; use middle::ty::{ctxt, field, method}; use middle::ty::{mt, t, param_bound}; use middle::ty::{re_bound, re_free, re_scope, re_infer, re_static, Region}; @@ -90,6 +91,7 @@ fn explain_region_and_span(cx: ctxt, region: ty::Region) let prefix = match br { br_anon(idx) => fmt!("the anonymous lifetime #%u defined on", idx + 1), + br_fresh(_) => fmt!("an anonymous lifetime defined on"), _ => fmt!("the lifetime %s as defined on", bound_region_to_str(cx, br)) }; @@ -136,6 +138,7 @@ fn bound_region_to_str_adorned(cx: ctxt, prefix: ~str, br_named(id) => fmt!("%s%s%s", prefix, cx.sess.str_of(id), sep), br_self => fmt!("%sself%s", prefix, sep), br_anon(_) => prefix, + br_fresh(_) => prefix, br_cap_avoid(_, br) => bound_region_to_str_adorned(cx, prefix, *br, sep) } diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index 6481785e61af2..48632664786be 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -311,6 +311,10 @@ pub impl CodeMap { } pub fn span_to_str(&self, sp: span) -> ~str { + if self.files.len() == 0 && sp == ast_util::dummy_sp() { + return ~"no-location"; + } + let lo = self.lookup_char_pos_adj(sp.lo); let hi = self.lookup_char_pos_adj(sp.hi); return fmt!("%s:%u:%u: %u:%u", lo.filename, diff --git a/src/libsyntax/diagnostic.rs b/src/libsyntax/diagnostic.rs index e42bb00c2129d..bcc9403b95b6e 100644 --- a/src/libsyntax/diagnostic.rs +++ b/src/libsyntax/diagnostic.rs @@ -11,8 +11,9 @@ use std::term; use io::WriterUtil; use codemap::span; +use dvec::DVec; -export emitter, emit; +export emitter, collect, emit; export level, fatal, error, warning, note; export span_handler, handler, mk_span_handler, mk_handler; export codemap_span_handler, codemap_handler; @@ -136,7 +137,7 @@ fn mk_handler(emitter: Option) -> handler { Some(e) => e, None => { let f = fn@(cmsp: Option<(@codemap::CodeMap, span)>, - msg: &str, t: level) { + msg: &str, t: level) { emit(cmsp, msg, t); }; f @@ -199,6 +200,12 @@ fn print_diagnostic(topic: ~str, lvl: level, msg: &str) { io::stderr().write_str(fmt!(" %s\n", msg)); } +fn collect(messages: @DVec<~str>) + -> fn@(Option<(@codemap::CodeMap, span)>, &str, level) +{ + |_o, msg: &str, _l| { messages.push(msg.to_str()); } +} + fn emit(cmsp: Option<(@codemap::CodeMap, span)>, msg: &str, lvl: level) { match cmsp { Some((cm, sp)) => {