diff --git a/Cargo.lock b/Cargo.lock
index 316724ca65158..3207f13683efc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1975,6 +1975,7 @@ dependencies = [
  "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "polonius-engine 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-rayon 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-rayon-core 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc_apfloat 0.0.0",
diff --git a/src/librustc/Cargo.toml b/src/librustc/Cargo.toml
index 3316735de663e..838971f9efc28 100644
--- a/src/librustc/Cargo.toml
+++ b/src/librustc/Cargo.toml
@@ -20,6 +20,7 @@ log = { version = "0.4", features = ["release_max_level_info", "std"] }
 polonius-engine = "0.5.0"
 rustc-rayon = "0.1.1"
 rustc-rayon-core = "0.1.1"
+rustc-hash = "1.0.1"
 rustc_apfloat = { path = "../librustc_apfloat" }
 rustc_target = { path = "../librustc_target" }
 rustc_data_structures = { path = "../librustc_data_structures" }
diff --git a/src/librustc/dep_graph/dep_node.rs b/src/librustc/dep_graph/dep_node.rs
index a20d04972fd75..1dcd4ceb2bb34 100644
--- a/src/librustc/dep_graph/dep_node.rs
+++ b/src/librustc/dep_graph/dep_node.rs
@@ -162,7 +162,9 @@ macro_rules! define_dep_nodes {
                 }
             }
 
-            #[inline]
+            // FIXME: Make `is_anon`, `is_input`, `is_eval_always` and `has_params` properties
+            // of queries
+            #[inline(always)]
             pub fn is_anon(&self) -> bool {
                 match *self {
                     $(
@@ -171,8 +173,8 @@ macro_rules! define_dep_nodes {
                 }
             }
 
-            #[inline]
-            pub fn is_input(&self) -> bool {
+            #[inline(always)]
+            pub fn is_input_inlined(&self) -> bool {
                 match *self {
                     $(
                         DepKind :: $variant => { contains_input_attr!($($attr),*) }
@@ -180,7 +182,11 @@ macro_rules! define_dep_nodes {
                 }
             }
 
-            #[inline]
+            pub fn is_input(&self) -> bool {
+                self.is_input_inlined()
+            }
+
+            #[inline(always)]
             pub fn is_eval_always(&self) -> bool {
                 match *self {
                     $(
@@ -190,8 +196,8 @@ macro_rules! define_dep_nodes {
             }
 
             #[allow(unreachable_code)]
-            #[inline]
-            pub fn has_params(&self) -> bool {
+            #[inline(always)]
+            pub fn has_params_inlined(&self) -> bool {
                 match *self {
                     $(
                         DepKind :: $variant => {
@@ -212,6 +218,10 @@ macro_rules! define_dep_nodes {
                     )*
                 }
             }
+
+            pub fn has_params(&self) -> bool {
+                self.has_params_inlined()
+            }
         }
 
         pub enum DepConstructor<$tcx> {
@@ -230,7 +240,8 @@ macro_rules! define_dep_nodes {
 
         impl DepNode {
             #[allow(unreachable_code, non_snake_case)]
-            pub fn new<'a, 'gcx, 'tcx>(tcx: TyCtxt<'a, 'gcx, 'tcx>,
+            #[inline(always)]
+            pub fn new_inlined<'a, 'gcx, 'tcx>(tcx: TyCtxt<'a, 'gcx, 'tcx>,
                                        dep: DepConstructor<'gcx>)
                                        -> DepNode
                 where 'gcx: 'a + 'tcx,
@@ -299,7 +310,7 @@ macro_rules! define_dep_nodes {
             /// Construct a DepNode from the given DepKind and DefPathHash. This
             /// method will assert that the given DepKind actually requires a
             /// single DefId/DefPathHash parameter.
-            #[inline]
+            #[inline(always)]
             pub fn from_def_path_hash(kind: DepKind,
                                       def_path_hash: DefPathHash)
                                       -> DepNode {
@@ -313,9 +324,9 @@ macro_rules! define_dep_nodes {
             /// Create a new, parameterless DepNode. This method will assert
             /// that the DepNode corresponding to the given DepKind actually
             /// does not require any parameters.
-            #[inline]
+            #[inline(always)]
             pub fn new_no_params(kind: DepKind) -> DepNode {
-                assert!(!kind.has_params());
+                assert!(!kind.has_params_inlined());
                 DepNode {
                     kind,
                     hash: Fingerprint::ZERO,
@@ -418,14 +429,14 @@ impl fmt::Debug for DepNode {
 
 
 impl DefPathHash {
-    #[inline]
+    #[inline(always)]
     pub fn to_dep_node(self, kind: DepKind) -> DepNode {
         DepNode::from_def_path_hash(kind, self)
     }
 }
 
 impl DefId {
-    #[inline]
+    #[inline(always)]
     pub fn to_dep_node(self, tcx: TyCtxt<'_, '_, '_>, kind: DepKind) -> DepNode {
         DepNode::from_def_path_hash(kind, tcx.def_path_hash(self))
     }
diff --git a/src/librustc/dep_graph/graph.rs b/src/librustc/dep_graph/graph.rs
index 4c94c993ab405..5db80bded38ae 100644
--- a/src/librustc/dep_graph/graph.rs
+++ b/src/librustc/dep_graph/graph.rs
@@ -13,7 +13,8 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_data_structures::indexed_vec::{Idx, IndexVec};
 use smallvec::SmallVec;
-use rustc_data_structures::sync::{Lrc, Lock};
+use rustc_data_structures::sync::{Lrc, Lock, AtomicU64, Ordering::Relaxed};
+use rustc_data_structures::vec::SmallVecExt;
 use std::env;
 use std::hash::Hash;
 use ty::{self, TyCtxt};
@@ -69,6 +70,8 @@ struct DepGraphData {
     /// current one anymore.
     current: Lock<CurrentDepGraph>,
 
+    current_atomic: CurrentDepGraphAtomic,
+
     /// The dep-graph from the previous compilation session. It contains all
     /// nodes and edges as well as all fingerprints of nodes that have them.
     previous: PreviousDepGraph,
@@ -98,11 +101,13 @@ impl DepGraph {
 
         let fingerprints = IndexVec::from_elem_n(Fingerprint::ZERO,
                                                  (prev_graph_node_count * 115) / 100);
+        let (current, current_atomic) = CurrentDepGraph::new();
         DepGraph {
             data: Some(Lrc::new(DepGraphData {
                 previous_work_products: prev_work_products,
                 dep_node_debug: Default::default(),
-                current: Lock::new(CurrentDepGraph::new()),
+                current: Lock::new(current),
+                current_atomic,
                 previous: prev_graph,
                 colors: Lock::new(DepNodeColorMap::new(prev_graph_node_count)),
                 loaded_from_cache: Default::default(),
@@ -202,204 +207,255 @@ impl DepGraph {
                                    arg: A,
                                    task: fn(C, A) -> R)
                                    -> (R, DepNodeIndex)
-        where C: DepGraphSafe + StableHashingContextProvider<'gcx>,
+        where C: DepGraphSafe + StableHashingContextProvider<'gcx> + Clone,
               R: HashStable<StableHashingContext<'gcx>>,
     {
-        self.with_task_impl(key, cx, arg, false, task,
-            |key| OpenTask::Regular(Lock::new(RegularOpenTask {
+        if let Some(ref data) = self.data {
+            let open_task = OpenTask::Regular(Lock::new(RegularOpenTask {
                 node: key,
                 reads: SmallVec::new(),
                 read_set: Default::default(),
-            })),
-            |data, key, task| data.borrow_mut().complete_task(key, task))
+            }));
+            let result = ty::tls::with_context(|icx| {
+                let icx = ty::tls::ImplicitCtxt {
+                    task: &open_task,
+                    ..icx.clone()
+                };
+
+                ty::tls::enter_context(&icx, |_| {
+                    task(cx.clone(), arg)
+                })
+            });
+            let dep_node_index = data.current.borrow_mut().complete_task(key, open_task);
+            self.finish_task_incr_on(data, key, cx, &result, dep_node_index);
+            (result, dep_node_index)
+        } else {
+            let result = task(cx.clone(), arg);
+            self.finish_task_incr_off(key, cx, &result);
+            (result, DepNodeIndex::INVALID)
+        }
     }
 
-    /// Creates a new dep-graph input with value `input`
-    pub fn input_task<'gcx, C, R>(&self,
-                                   key: DepNode,
-                                   cx: C,
-                                   input: R)
-                                   -> (R, DepNodeIndex)
-        where C: DepGraphSafe + StableHashingContextProvider<'gcx>,
-              R: HashStable<StableHashingContext<'gcx>>,
+    /// Execute something within an "eval-always" task which is a task
+    // that runs whenever anything changes.
+    // FIXME: Find a way to make F: DepGraphSafe
+    pub fn with_eval_always_task<'a, F, R>(
+        &self,
+        tcx: TyCtxt<'a, '_, '_>,
+        key: DepNode,
+        task: F,
+    ) -> (R, DepNodeIndex)
+        where F: FnOnce(&OpenTask) -> R,
+              R: HashStable<StableHashingContext<'a>>,
     {
-        fn identity_fn<C, A>(_: C, arg: A) -> A {
-            arg
+        if let Some(ref data) = self.data {
+            let open_task = OpenTask::EvalAlways { node: key };
+            let result = task(&open_task);
+            let dep_node_index = data.current.borrow_mut()
+                                             .complete_eval_always_task(key, open_task);
+            self.finish_task_incr_on(data, key, tcx, &result, dep_node_index);
+            (result, dep_node_index)
+        } else {
+            debug_assert!(!key.kind.fingerprint_needed_for_crate_hash());
+            (task(&OpenTask::Ignore), DepNodeIndex::INVALID)
         }
+    }
 
-        self.with_task_impl(key, cx, input, true, identity_fn,
-            |_| OpenTask::Ignore,
-            |data, key, _| data.borrow_mut().alloc_node(key, SmallVec::new()))
+    // FIXME: Merge with with_task?
+    #[inline]
+    pub fn with_query_task<'a, F, R>(
+        &self,
+        tcx: TyCtxt<'a, '_, '_>,
+        key: DepNode,
+        task: F,
+    ) -> (R, DepNodeIndex)
+        where F: FnOnce(&OpenTask) -> R,
+              R: HashStable<StableHashingContext<'a>>,
+    {
+        if let Some(ref data) = self.data {
+            let open_task = OpenTask::Regular(Lock::new(RegularOpenTask {
+                node: key,
+                reads: SmallVec::new(),
+                read_set: Default::default(),
+            }));
+            let result = task(&open_task);
+            // FIXME: Look at `complete_task` and the same for other functions
+            let dep_node_index = data.current.borrow_mut().complete_task(key, open_task);
+            self.finish_task_incr_on(data, key, tcx, &result, dep_node_index);
+            (result, dep_node_index)
+        } else {
+            debug_assert!(!key.kind.fingerprint_needed_for_crate_hash());
+            // with_task runs finish_task_incr_off here
+            (task(&OpenTask::Ignore), DepNodeIndex::INVALID)
+        }
     }
 
-    fn with_task_impl<'gcx, C, A, R>(
+    /// Creates a new dep-graph input with value `input`
+    pub fn input_dep_index<'gcx, R>(
         &self,
         key: DepNode,
+        cx: &StableHashingContext<'gcx>,
+        input: &R
+    ) -> DepNodeIndex
+        where R: HashStable<StableHashingContext<'gcx>>,
+    {
+        // This assumes that we don't have an ImplicitCtxt and thus have
+        // an implicit OpenTask::Ignore task
+        debug_assert!(ty::tls::with_opt(|tcx| tcx.is_none()));
+
+        if let Some(ref data) = self.data {
+            let dep_node_index = data.current.borrow_mut().alloc_node(key, SmallVec::new());
+            self.finish_task_incr_on(data, key, cx, input, dep_node_index);
+            dep_node_index
+        } else {
+            self.finish_task_incr_off(key, cx, input)
+        }
+    }
+
+    fn finish_task_incr_on<'gcx, C, R>(
+        &self,
+        data: &DepGraphData,
+        key: DepNode,
         cx: C,
-        arg: A,
-        no_tcx: bool,
-        task: fn(C, A) -> R,
-        create_task: fn(DepNode) -> OpenTask,
-        finish_task_and_alloc_depnode: fn(&Lock<CurrentDepGraph>,
-                                          DepNode,
-                                          OpenTask) -> DepNodeIndex
-    ) -> (R, DepNodeIndex)
+        result: &R,
+        dep_node_index: DepNodeIndex,
+    )
     where
         C: DepGraphSafe + StableHashingContextProvider<'gcx>,
         R: HashStable<StableHashingContext<'gcx>>,
     {
-        if let Some(ref data) = self.data {
-            let open_task = create_task(key);
+        // In incremental mode, hash the result of the task. We don't
+        // do anything with the hash yet, but we are computing it
+        // anyway so that
+        //  - we make sure that the infrastructure works and
+        //  - we can get an idea of the runtime cost.
+        let mut hcx = cx.get_stable_hashing_context();
+
+        if cfg!(debug_assertions) {
+            profq_msg(hcx.sess(), ProfileQueriesMsg::TaskBegin(key.clone()))
+        };
 
-            // In incremental mode, hash the result of the task. We don't
-            // do anything with the hash yet, but we are computing it
-            // anyway so that
-            //  - we make sure that the infrastructure works and
-            //  - we can get an idea of the runtime cost.
-            let mut hcx = cx.get_stable_hashing_context();
+        if cfg!(debug_assertions) {
+            profq_msg(hcx.sess(), ProfileQueriesMsg::TaskEnd)
+        };
 
-            if cfg!(debug_assertions) {
-                profq_msg(hcx.sess(), ProfileQueriesMsg::TaskBegin(key.clone()))
-            };
+        let mut stable_hasher = StableHasher::new();
+        result.hash_stable(&mut hcx, &mut stable_hasher);
 
-            let result = if no_tcx {
-                task(cx, arg)
-            } else {
-                ty::tls::with_context(|icx| {
-                    let icx = ty::tls::ImplicitCtxt {
-                        task: &open_task,
-                        ..icx.clone()
-                    };
-
-                    ty::tls::enter_context(&icx, |_| {
-                        task(cx, arg)
-                    })
-                })
-            };
+        let current_fingerprint = stable_hasher.finish();
 
-            if cfg!(debug_assertions) {
-                profq_msg(hcx.sess(), ProfileQueriesMsg::TaskEnd)
-            };
+        // Store the current fingerprint
+        {
+            let mut fingerprints = self.fingerprints.borrow_mut();
 
-            let dep_node_index = finish_task_and_alloc_depnode(&data.current, key, open_task);
+            if dep_node_index.index() >= fingerprints.len() {
+                fingerprints.resize(dep_node_index.index() + 1, Fingerprint::ZERO);
+            }
 
-            let mut stable_hasher = StableHasher::new();
-            result.hash_stable(&mut hcx, &mut stable_hasher);
+            debug_assert!(fingerprints[dep_node_index] == Fingerprint::ZERO,
+                            "DepGraph::with_task() - Duplicate fingerprint \
+                            insertion for {:?}", key);
+            fingerprints[dep_node_index] = current_fingerprint;
+        }
 
-            let current_fingerprint = stable_hasher.finish();
+        // Determine the color of the new DepNode.
+        if let Some(prev_index) = data.previous.node_to_index_opt(&key) {
+            let prev_fingerprint = data.previous.fingerprint_by_index(prev_index);
 
-            // Store the current fingerprint
-            {
-                let mut fingerprints = self.fingerprints.borrow_mut();
+            let color = if current_fingerprint == prev_fingerprint {
+                DepNodeColor::Green(dep_node_index)
+            } else {
+                DepNodeColor::Red
+            };
 
-                if dep_node_index.index() >= fingerprints.len() {
-                    fingerprints.resize(dep_node_index.index() + 1, Fingerprint::ZERO);
-                }
+            let mut colors = data.colors.borrow_mut();
+            debug_assert!(colors.get(prev_index).is_none(),
+                            "DepGraph::with_task() - Duplicate DepNodeColor \
+                            insertion for {:?}", key);
 
-                debug_assert!(fingerprints[dep_node_index] == Fingerprint::ZERO,
-                              "DepGraph::with_task() - Duplicate fingerprint \
-                               insertion for {:?}", key);
-                fingerprints[dep_node_index] = current_fingerprint;
-            }
+            colors.insert(prev_index, color);
+        }
+    }
 
-            // Determine the color of the new DepNode.
-            if let Some(prev_index) = data.previous.node_to_index_opt(&key) {
-                let prev_fingerprint = data.previous.fingerprint_by_index(prev_index);
+    fn finish_task_incr_off<'gcx, C, R>(
+        &self,
+        key: DepNode,
+        cx: C,
+        result: &R,
+    ) -> DepNodeIndex
+    where
+        C: DepGraphSafe + StableHashingContextProvider<'gcx>,
+        R: HashStable<StableHashingContext<'gcx>>,
+    {
+        debug_assert!(self.data.is_none());
 
-                let color = if current_fingerprint == prev_fingerprint {
-                    DepNodeColor::Green(dep_node_index)
-                } else {
-                    DepNodeColor::Red
-                };
+        if key.kind.fingerprint_needed_for_crate_hash() {
+            let mut hcx = cx.get_stable_hashing_context();
+            let mut stable_hasher = StableHasher::new();
+            result.hash_stable(&mut hcx, &mut stable_hasher);
+            let fingerprint = stable_hasher.finish();
 
-                let mut colors = data.colors.borrow_mut();
-                debug_assert!(colors.get(prev_index).is_none(),
-                              "DepGraph::with_task() - Duplicate DepNodeColor \
-                               insertion for {:?}", key);
+            let mut fingerprints = self.fingerprints.borrow_mut();
+            let dep_node_index = DepNodeIndex::new(fingerprints.len());
+            fingerprints.push(fingerprint);
 
-                colors.insert(prev_index, color);
-            }
+            debug_assert!(fingerprints[dep_node_index] == fingerprint,
+                            "DepGraph::with_task() - Assigned fingerprint to \
+                            unexpected index for {:?}", key);
 
-            (result, dep_node_index)
+            dep_node_index
         } else {
-            if key.kind.fingerprint_needed_for_crate_hash() {
-                let mut hcx = cx.get_stable_hashing_context();
-                let result = task(cx, arg);
-                let mut stable_hasher = StableHasher::new();
-                result.hash_stable(&mut hcx, &mut stable_hasher);
-                let fingerprint = stable_hasher.finish();
-
-                let mut fingerprints = self.fingerprints.borrow_mut();
-                let dep_node_index = DepNodeIndex::new(fingerprints.len());
-                fingerprints.push(fingerprint);
-
-                debug_assert!(fingerprints[dep_node_index] == fingerprint,
-                              "DepGraph::with_task() - Assigned fingerprint to \
-                               unexpected index for {:?}", key);
-
-                (result, dep_node_index)
-            } else {
-                (task(cx, arg), DepNodeIndex::INVALID)
-            }
+            DepNodeIndex::INVALID
         }
     }
 
     /// Execute something within an "anonymous" task, that is, a task the
     /// DepNode of which is determined by the list of inputs it read from.
-    pub fn with_anon_task<OP,R>(&self, dep_kind: DepKind, op: OP) -> (R, DepNodeIndex)
-        where OP: FnOnce() -> R
+    pub fn with_anon_open_task<OP,R>(&self, dep_kind: DepKind, op: OP) -> (R, DepNodeIndex)
+        where OP: FnOnce(&OpenTask) -> R
     {
         if let Some(ref data) = self.data {
-            let (result, open_task) = ty::tls::with_context(|icx| {
-                let task = OpenTask::Anon(Lock::new(AnonOpenTask {
-                    reads: SmallVec::new(),
-                    read_set: Default::default(),
-                }));
-
-                let r = {
-                    let icx = ty::tls::ImplicitCtxt {
-                        task: &task,
-                        ..icx.clone()
-                    };
-
-                    ty::tls::enter_context(&icx, |_| {
-                        op()
-                    })
-                };
+            let task = OpenTask::Anon(Lock::new(AnonOpenTask {
+                reads: SmallVec::new(),
+                read_set: Default::default(),
+            }));
 
-                (r, task)
-            });
+            let result = op(&task);
             let dep_node_index = data.current
                                      .borrow_mut()
-                                     .pop_anon_task(dep_kind, open_task);
+                                     .complete_anon_task(dep_kind, task);
             (result, dep_node_index)
         } else {
-            (op(), DepNodeIndex::INVALID)
+            (op(&OpenTask::Ignore), DepNodeIndex::INVALID)
         }
     }
 
-    /// Execute something within an "eval-always" task which is a task
-    // that runs whenever anything changes.
-    pub fn with_eval_always_task<'gcx, C, A, R>(&self,
-                                   key: DepNode,
-                                   cx: C,
-                                   arg: A,
-                                   task: fn(C, A) -> R)
-                                   -> (R, DepNodeIndex)
-        where C: DepGraphSafe + StableHashingContextProvider<'gcx>,
-              R: HashStable<StableHashingContext<'gcx>>,
+    /// Execute something within an "anonymous" task, that is, a task the
+    /// DepNode of which is determined by the list of inputs it read from.
+    pub fn with_anon_task<OP,R>(&self, dep_kind: DepKind, op: OP) -> (R, DepNodeIndex)
+        where OP: FnOnce() -> R
     {
-        self.with_task_impl(key, cx, arg, false, task,
-            |key| OpenTask::EvalAlways { node: key },
-            |data, key, task| data.borrow_mut().complete_eval_always_task(key, task))
+        self.with_anon_open_task(dep_kind, |task| {
+            ty::tls::with_context(|icx| {
+                let icx = ty::tls::ImplicitCtxt {
+                    task,
+                    ..icx.clone()
+                };
+
+                ty::tls::enter_context(&icx, |_| {
+                    op()
+                })
+            })
+        })
     }
 
     #[inline]
     pub fn read(&self, v: DepNode) {
         if let Some(ref data) = self.data {
-            let mut current = data.current.borrow_mut();
+            let current = data.current.borrow_mut();
             if let Some(&dep_node_index) = current.node_to_node_index.get(&v) {
-                current.read_index(dep_node_index);
+                std::mem::drop(current);
+                data.current_atomic.read_index(&data.current, dep_node_index);
             } else {
                 bug!("DepKind {:?} should be pre-allocated but isn't.", v.kind)
             }
@@ -409,7 +465,7 @@ impl DepGraph {
     #[inline]
     pub fn read_index(&self, dep_node_index: DepNodeIndex) {
         if let Some(ref data) = self.data {
-            data.current.borrow_mut().read_index(dep_node_index);
+            data.current_atomic.read_index(&data.current, dep_node_index);
         }
     }
 
@@ -500,9 +556,10 @@ impl DepGraph {
     }
 
     pub fn edge_deduplication_data(&self) -> (u64, u64) {
-        let current_dep_graph = self.data.as_ref().unwrap().current.borrow();
+        let current_dep_graph = &self.data.as_ref().unwrap().current_atomic;
 
-        (current_dep_graph.total_read_count, current_dep_graph.total_duplicate_read_count)
+        (current_dep_graph.total_read_count.load(Relaxed),
+         current_dep_graph.total_duplicate_read_count.load(Relaxed))
     }
 
     pub fn serialize(&self) -> SerializedDepGraph {
@@ -756,7 +813,7 @@ impl DepGraph {
 
                 // Promote the previous diagnostics to the current session.
                 tcx.queries.on_disk_cache
-                   .store_diagnostics(dep_node_index, diagnostics.clone());
+                   .store_diagnostics(dep_node_index, Box::new(diagnostics.clone()));
 
                 for diagnostic in diagnostics {
                     DiagnosticBuilder::new_diagnostic(handle, diagnostic).emit();
@@ -887,6 +944,11 @@ pub enum WorkProductFileKind {
     BytecodeCompressed,
 }
 
+pub(super) struct CurrentDepGraphAtomic {
+    total_read_count: AtomicU64,
+    total_duplicate_read_count: AtomicU64,
+}
+
 pub(super) struct CurrentDepGraph {
     nodes: IndexVec<DepNodeIndex, DepNode>,
     edges: IndexVec<DepNodeIndex, SmallVec<[DepNodeIndex; 8]>>,
@@ -905,13 +967,10 @@ pub(super) struct CurrentDepGraph {
     // each anon node. The session-key is just a random number generated when
     // the DepGraph is created.
     anon_id_seed: Fingerprint,
-
-    total_read_count: u64,
-    total_duplicate_read_count: u64,
 }
 
 impl CurrentDepGraph {
-    fn new() -> CurrentDepGraph {
+    fn new() -> (CurrentDepGraph, CurrentDepGraphAtomic) {
         use std::time::{SystemTime, UNIX_EPOCH};
 
         let duration = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
@@ -934,17 +993,19 @@ impl CurrentDepGraph {
             None
         };
 
-        CurrentDepGraph {
+        (CurrentDepGraph {
             nodes: IndexVec::new(),
             edges: IndexVec::new(),
             node_to_node_index: Default::default(),
             anon_id_seed: stable_hasher.finish(),
             forbidden_edge,
-            total_read_count: 0,
-            total_duplicate_read_count: 0,
-        }
+        }, CurrentDepGraphAtomic {
+            total_read_count: AtomicU64::new(0),
+            total_duplicate_read_count: AtomicU64::new(0),
+        })
     }
 
+    #[inline(always)]
     fn complete_task(&mut self, key: DepNode, task: OpenTask) -> DepNodeIndex {
         if let OpenTask::Regular(task) = task {
             let RegularOpenTask {
@@ -982,7 +1043,8 @@ impl CurrentDepGraph {
         }
     }
 
-    fn pop_anon_task(&mut self, kind: DepKind, task: OpenTask) -> DepNodeIndex {
+    #[inline(always)]
+    fn complete_anon_task(&mut self, kind: DepKind, task: OpenTask) -> DepNodeIndex {
         if let OpenTask::Anon(task) = task {
             let AnonOpenTask {
                 read_set: _,
@@ -1021,6 +1083,7 @@ impl CurrentDepGraph {
         }
     }
 
+    #[inline(always)]
     fn complete_eval_always_task(&mut self, key: DepNode, task: OpenTask) -> DepNodeIndex {
         if let OpenTask::EvalAlways {
             node,
@@ -1033,20 +1096,39 @@ impl CurrentDepGraph {
         }
     }
 
-    fn read_index(&mut self, source: DepNodeIndex) {
+    fn alloc_node(&mut self,
+                  dep_node: DepNode,
+                  edges: SmallVec<[DepNodeIndex; 8]>)
+                  -> DepNodeIndex {
+        debug_assert_eq!(self.edges.len(), self.nodes.len());
+        debug_assert_eq!(self.node_to_node_index.len(), self.nodes.len());
+        debug_assert!(!self.node_to_node_index.contains_key(&dep_node));
+        let dep_node_index = DepNodeIndex::new(self.nodes.len());
+        self.nodes.push(dep_node);
+        self.node_to_node_index.insert(dep_node, dep_node_index);
+        self.edges.push(edges);
+        dep_node_index
+    }
+}
+
+impl CurrentDepGraphAtomic {
+    fn read_index(&self, lock: &Lock<CurrentDepGraph>, source: DepNodeIndex) {
         ty::tls::with_context_opt(|icx| {
             let icx = if let Some(icx) = icx { icx } else { return };
             match *icx.task {
                 OpenTask::Regular(ref task) => {
                     let mut task = task.lock();
-                    self.total_read_count += 1;
+                    self.total_read_count.fetch_add(1, Relaxed);
+                    // FIXME: Only use the set of the SmallVec moved to the heap
+                    // Use an array and switch to the set after?
                     if task.read_set.insert(source) {
-                        task.reads.push(source);
+                        task.reads.push_light(source);
 
                         if cfg!(debug_assertions) {
-                            if let Some(ref forbidden_edge) = self.forbidden_edge {
+                            let graph = lock.lock();
+                            if let Some(ref forbidden_edge) = graph.forbidden_edge {
                                 let target = &task.node;
-                                let source = self.nodes[source];
+                                let source = graph.nodes[source];
                                 if forbidden_edge.test(&source, &target) {
                                     bug!("forbidden edge {:?} -> {:?} created",
                                         source,
@@ -1055,13 +1137,13 @@ impl CurrentDepGraph {
                             }
                         }
                     } else {
-                        self.total_duplicate_read_count += 1;
+                        self.total_duplicate_read_count.fetch_add(1, Relaxed);
                     }
                 }
                 OpenTask::Anon(ref task) => {
                     let mut task = task.lock();
                     if task.read_set.insert(source) {
-                        task.reads.push(source);
+                        task.reads.push_light(source);
                     }
                 }
                 OpenTask::Ignore | OpenTask::EvalAlways { .. } => {
@@ -1070,20 +1152,6 @@ impl CurrentDepGraph {
             }
         })
     }
-
-    fn alloc_node(&mut self,
-                  dep_node: DepNode,
-                  edges: SmallVec<[DepNodeIndex; 8]>)
-                  -> DepNodeIndex {
-        debug_assert_eq!(self.edges.len(), self.nodes.len());
-        debug_assert_eq!(self.node_to_node_index.len(), self.nodes.len());
-        debug_assert!(!self.node_to_node_index.contains_key(&dep_node));
-        let dep_node_index = DepNodeIndex::new(self.nodes.len());
-        self.nodes.push(dep_node);
-        self.node_to_node_index.insert(dep_node, dep_node_index);
-        self.edges.push(edges);
-        dep_node_index
-    }
 }
 
 pub struct RegularOpenTask {
@@ -1092,6 +1160,53 @@ pub struct RegularOpenTask {
     read_set: FxHashSet<DepNodeIndex>,
 }
 
+// FIXME: Remove
+#[no_mangle]
+pub fn test1(a: &mut SmallVec<[DepNodeIndex; 8]>) {
+    a.push(DepNodeIndex::new(8));
+}
+
+// FIXME: Remove
+#[no_mangle]
+pub fn test3(a: &mut SmallVec<[DepNodeIndex; 8]>) {
+    a.push_light(DepNodeIndex::new(8));
+}
+
+#[no_mangle]
+pub fn test2(a: &mut DepGraph, dep_node_index: DepNodeIndex) {
+    a.read_index(dep_node_index)
+}
+use hir::def_id::DefId;
+use std::hash::Hasher;
+use rustc_hash::FxHasher;
+#[no_mangle]
+pub fn test4(a: DefId) -> u64 {
+    let mut hasher = FxHasher::default();
+    a.hash(&mut hasher);
+    hasher.finish()
+}
+use hir::def_id::CrateNum;
+#[no_mangle]
+pub fn test7(a: CrateNum) -> u64 {
+    let mut hasher = FxHasher::default();
+    a.hash(&mut hasher);
+    hasher.finish()
+}
+use syntax::ast::NodeId;
+#[no_mangle]
+pub fn test5(a: NodeId) -> u64 {
+    let mut hasher = FxHasher::default();
+    a.hash(&mut hasher);
+    hasher.finish()
+}
+use hir::HirId;
+#[no_mangle]
+pub fn test6(a: HirId) -> u64 {
+    let mut hasher = FxHasher::default();
+    a.hash(&mut hasher);
+    hasher.finish()
+}
+
 pub struct AnonOpenTask {
     reads: SmallVec<[DepNodeIndex; 8]>,
     read_set: FxHashSet<DepNodeIndex>,
diff --git a/src/librustc/hir/map/collector.rs b/src/librustc/hir/map/collector.rs
index 2917fd7457acf..ea1845530fe84 100644
--- a/src/librustc/hir/map/collector.rs
+++ b/src/librustc/hir/map/collector.rs
@@ -83,20 +83,20 @@ impl<'a, 'hir> NodeCollector<'a, 'hir> {
                 body_ids: _,
             } = *krate;
 
-            root_mod_sig_dep_index = dep_graph.input_task(
+            root_mod_sig_dep_index = dep_graph.input_dep_index(
                 root_mod_def_path_hash.to_dep_node(DepKind::Hir),
                 &hcx,
-                HirItemLike { item_like: (module, attrs, span), hash_bodies: false },
-            ).1;
-            root_mod_full_dep_index = dep_graph.input_task(
+                &HirItemLike { item_like: (module, attrs, span), hash_bodies: false },
+            );
+            root_mod_full_dep_index = dep_graph.input_dep_index(
                 root_mod_def_path_hash.to_dep_node(DepKind::HirBody),
                 &hcx,
-                HirItemLike { item_like: (module, attrs, span), hash_bodies: true },
-            ).1;
+                &HirItemLike { item_like: (module, attrs, span), hash_bodies: true },
+            );
         }
 
         {
-            dep_graph.input_task(
+            dep_graph.input_dep_index(
                 DepNode::new_no_params(DepKind::AllLocalTraitImpls),
                 &hcx,
                 &krate.trait_impls,
@@ -169,11 +169,11 @@ impl<'a, 'hir> NodeCollector<'a, 'hir> {
 
         source_file_names.sort_unstable();
 
-        let (_, crate_dep_node_index) = self
+        let crate_dep_node_index = self
             .dep_graph
-            .input_task(DepNode::new_no_params(DepKind::Krate),
+            .input_dep_index(DepNode::new_no_params(DepKind::Krate),
                        &self.hcx,
-                       (((node_hashes, upstream_crates), source_file_names),
+                       &(((node_hashes, upstream_crates), source_file_names),
                         (commandline_args_hash,
                          crate_disambiguator.to_fingerprint())));
 
@@ -261,17 +261,17 @@ impl<'a, 'hir> NodeCollector<'a, 'hir> {
 
         let def_path_hash = self.definitions.def_path_hash(dep_node_owner);
 
-        self.current_signature_dep_index = self.dep_graph.input_task(
+        self.current_signature_dep_index = self.dep_graph.input_dep_index(
             def_path_hash.to_dep_node(DepKind::Hir),
             &self.hcx,
-            HirItemLike { item_like, hash_bodies: false },
-        ).1;
+            &HirItemLike { item_like, hash_bodies: false },
+        );
 
-        self.current_full_dep_index = self.dep_graph.input_task(
+        self.current_full_dep_index = self.dep_graph.input_dep_index(
             def_path_hash.to_dep_node(DepKind::HirBody),
             &self.hcx,
-            HirItemLike { item_like, hash_bodies: true },
-        ).1;
+            &HirItemLike { item_like, hash_bodies: true },
+        );
 
         self.hir_body_nodes.push((def_path_hash, self.current_full_dep_index));
 
diff --git a/src/librustc/hir/map/mod.rs b/src/librustc/hir/map/mod.rs
index 4ac07d78a2613..236066e22ab4d 100644
--- a/src/librustc/hir/map/mod.rs
+++ b/src/librustc/hir/map/mod.rs
@@ -159,6 +159,10 @@ impl Forest {
         self.dep_graph.read(DepNode::new_no_params(DepKind::Krate));
         &self.krate
     }
+
+    pub fn untracked_krate<'hir>(&'hir self) -> &'hir Crate {
+        &self.krate
+    }
 }
 
 /// Represents a mapping from Node IDs to AST elements and their parent
diff --git a/src/librustc/ich/hcx.rs b/src/librustc/ich/hcx.rs
index 7c623a1874e7b..33def778cf4a0 100644
--- a/src/librustc/ich/hcx.rs
+++ b/src/librustc/ich/hcx.rs
@@ -86,6 +86,7 @@ impl<'a> StableHashingContext<'a> {
     // The `krate` here is only used for mapping BodyIds to Bodies.
     // Don't use it for anything else or you'll run the risk of
     // leaking data out of the tracking system.
+    #[inline]
     pub fn new(sess: &'a Session,
                krate: &'a hir::Crate,
                definitions: &'a Definitions,
diff --git a/src/librustc/lib.rs b/src/librustc/lib.rs
index ddb0c5bf22ab6..5da084f82054c 100644
--- a/src/librustc/lib.rs
+++ b/src/librustc/lib.rs
@@ -59,7 +59,9 @@
 #![feature(slice_patterns)]
 #![feature(slice_sort_by_cached_key)]
 #![feature(specialization)]
+#![feature(stmt_expr_attributes)]
 #![feature(unboxed_closures)]
+#![feature(thread_local)]
 #![feature(trace_macros)]
 #![feature(trusted_len)]
 #![feature(vec_remove_item)]
@@ -69,12 +71,16 @@
 #![feature(in_band_lifetimes)]
 #![feature(crate_visibility_modifier)]
 #![feature(transpose_result)]
+#![feature(arbitrary_self_types)]
+#![feature(hash_raw_entry)]
+#![feature(maybe_uninit)]
 
 #![recursion_limit="512"]
 
 #![warn(elided_lifetimes_in_paths)]
 
 extern crate arena;
+extern crate rustc_hash;
 #[macro_use] extern crate bitflags;
 extern crate core;
 extern crate fmt_macros;
diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs
index 293cd0c7c546d..f3bb9785d873b 100644
--- a/src/librustc/session/mod.rs
+++ b/src/librustc/session/mod.rs
@@ -26,7 +26,10 @@ use util::common::{duration_to_secs_str, ErrorReported};
 use util::common::ProfileQueriesMsg;
 
 use rustc_data_structures::base_n;
-use rustc_data_structures::sync::{self, Lrc, Lock, LockCell, OneThread, Once, RwLock};
+use rustc_data_structures::sync::{
+    self, Lrc, Lock, OneThread, Once, RwLock, AtomicU64, AtomicUsize, AtomicBool, Ordering,
+    Ordering::SeqCst,
+};
 
 use errors::{self, DiagnosticBuilder, DiagnosticId, Applicability};
 use errors::emitter::{Emitter, EmitterWriter};
@@ -51,7 +54,6 @@ use std::io::Write;
 use std::path::{Path, PathBuf};
 use std::time::Duration;
 use std::sync::mpsc;
-use std::sync::atomic::{AtomicUsize, Ordering};
 
 mod code_stats;
 pub mod config;
@@ -128,6 +130,9 @@ pub struct Session {
     /// Used by -Z profile-queries in util::common
     pub profile_channel: Lock<Option<mpsc::Sender<ProfileQueriesMsg>>>,
 
+    /// Used by -Z self-profile
+    pub self_profiling_active: bool,
+
     /// Used by -Z self-profile
     pub self_profiling: Lock<SelfProfiler>,
 
@@ -142,15 +147,15 @@ pub struct Session {
     /// If -zfuel=crate=n is specified, Some(crate).
     optimization_fuel_crate: Option<String>,
     /// If -zfuel=crate=n is specified, initially set to n. Otherwise 0.
-    optimization_fuel_limit: LockCell<u64>,
+    optimization_fuel_limit: AtomicU64,
     /// We're rejecting all further optimizations.
-    out_of_fuel: LockCell<bool>,
+    out_of_fuel: AtomicBool,
 
     // The next two are public because the driver needs to read them.
     /// If -zprint-fuel=crate, Some(crate).
     pub print_fuel_crate: Option<String>,
     /// Always set to zero and incremented so that we can print fuel expended by a crate.
-    pub print_fuel: LockCell<u64>,
+    pub print_fuel: AtomicU64,
 
     /// Loaded up early on in the initialization of this `Session` to avoid
     /// false positives about a job server in our environment.
@@ -825,10 +830,17 @@ impl Session {
         }
     }
 
+    #[inline(never)]
+    #[cold]
+    fn profiler_active<F: FnOnce(&mut SelfProfiler) -> ()>(&self, f: F) {
+        let mut profiler = self.self_profiling.borrow_mut();
+        f(&mut profiler);
+    }
+
+    #[inline(always)]
     pub fn profiler<F: FnOnce(&mut SelfProfiler) -> ()>(&self, f: F) {
-        if self.opts.debugging_opts.self_profile || self.opts.debugging_opts.profile_json {
-            let mut profiler = self.self_profiling.borrow_mut();
-            f(&mut profiler);
+        if unlikely!(self.self_profiling_active) {
+            self.profiler_active(f)
         }
     }
 
@@ -859,32 +871,43 @@ impl Session {
                  self.perf_stats.normalize_projection_ty.load(Ordering::Relaxed));
     }
 
-    /// We want to know if we're allowed to do an optimization for crate foo from -z fuel=foo=n.
-    /// This expends fuel if applicable, and records fuel if applicable.
-    pub fn consider_optimizing<T: Fn() -> String>(&self, crate_name: &str, msg: T) -> bool {
+    #[inline(never)]
+    #[cold]
+    pub fn consider_optimizing_cold<T: Fn() -> String>(&self, crate_name: &str, msg: T) -> bool {
         let mut ret = true;
         if let Some(ref c) = self.optimization_fuel_crate {
             if c == crate_name {
                 assert_eq!(self.query_threads(), 1);
-                let fuel = self.optimization_fuel_limit.get();
+                let fuel = self.optimization_fuel_limit.load(SeqCst);
                 ret = fuel != 0;
-                if fuel == 0 && !self.out_of_fuel.get() {
+                if fuel == 0 && !self.out_of_fuel.load(SeqCst) {
                     eprintln!("optimization-fuel-exhausted: {}", msg());
-                    self.out_of_fuel.set(true);
+                    self.out_of_fuel.store(true, SeqCst);
                 } else if fuel > 0 {
-                    self.optimization_fuel_limit.set(fuel - 1);
+                    self.optimization_fuel_limit.store(fuel - 1, SeqCst);
                 }
             }
         }
         if let Some(ref c) = self.print_fuel_crate {
             if c == crate_name {
                 assert_eq!(self.query_threads(), 1);
-                self.print_fuel.set(self.print_fuel.get() + 1);
+                self.print_fuel.store(self.print_fuel.load(SeqCst) + 1, SeqCst);
             }
         }
         ret
     }
 
+    /// We want to know if we're allowed to do an optimization for crate foo from -z fuel=foo=n.
+    /// This expends fuel if applicable, and records fuel if applicable.
+    #[inline(always)]
+    pub fn consider_optimizing<T: Fn() -> String>(&self, crate_name: &str, msg: T) -> bool {
+        if likely!(self.optimization_fuel_crate.is_none() && self.print_fuel_crate.is_none()) {
+            true
+        } else {
+            self.consider_optimizing_cold(crate_name, msg)
+        }
+    }
+
     /// Returns the number of query threads that should be used for this
     /// compilation
     pub fn query_threads_from_opts(opts: &config::Options) -> usize {
@@ -1121,9 +1144,9 @@ pub fn build_session_(
 
     let optimization_fuel_crate = sopts.debugging_opts.fuel.as_ref().map(|i| i.0.clone());
     let optimization_fuel_limit =
-        LockCell::new(sopts.debugging_opts.fuel.as_ref().map(|i| i.1).unwrap_or(0));
+        AtomicU64::new(sopts.debugging_opts.fuel.as_ref().map(|i| i.1).unwrap_or(0));
     let print_fuel_crate = sopts.debugging_opts.print_fuel.clone();
-    let print_fuel = LockCell::new(0);
+    let print_fuel = AtomicU64::new(0);
 
     let working_dir = env::current_dir().unwrap_or_else(|e|
         p_s.span_diagnostic
@@ -1138,6 +1161,9 @@ pub fn build_session_(
         CguReuseTracker::new_disabled()
     };
 
+    let self_profiling_active = sopts.debugging_opts.self_profile ||
+                                sopts.debugging_opts.profile_json;
+
     let sess = Session {
         target: target_cfg,
         host,
@@ -1168,6 +1194,7 @@ pub fn build_session_(
         imported_macro_spans: OneThread::new(RefCell::new(FxHashMap::default())),
         incr_comp_session: OneThread::new(RefCell::new(IncrCompSession::NotInitialized)),
         cgu_reuse_tracker,
+        self_profiling_active,
         self_profiling: Lock::new(SelfProfiler::new()),
         profile_channel: Lock::new(None),
         perf_stats: PerfStats {
@@ -1182,7 +1209,7 @@ pub fn build_session_(
         optimization_fuel_limit,
         print_fuel_crate,
         print_fuel,
-        out_of_fuel: LockCell::new(false),
+        out_of_fuel: AtomicBool::new(false),
         // Note that this is unsafe because it may misinterpret file descriptors
         // on Unix as jobserver file descriptors. We hopefully execute this near
         // the beginning of the process though to ensure we don't get false
diff --git a/src/librustc/ty/context.rs b/src/librustc/ty/context.rs
index bd8b3e678d851..fd743c0cae06c 100644
--- a/src/librustc/ty/context.rs
+++ b/src/librustc/ty/context.rs
@@ -1331,8 +1331,9 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
         self.cstore.crate_data_as_rc_any(cnum)
     }
 
+    #[inline(always)]
     pub fn create_stable_hashing_context(self) -> StableHashingContext<'a> {
-        let krate = self.dep_graph.with_ignore(|| self.gcx.hir.krate());
+        let krate = self.gcx.hir.forest.untracked_krate();
 
         StableHashingContext::new(self.sess,
                                   krate,
@@ -1349,7 +1350,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
         // We cannot use the query versions of crates() and crate_hash(), since
         // those would need the DepNodes that we are allocating here.
         for cnum in self.cstore.crates_untracked() {
-            let dep_node = DepNode::new(self, DepConstructor::CrateMetadata(cnum));
+            let dep_node = DepNode::new_inlined(self, DepConstructor::CrateMetadata(cnum));
             let crate_hash = self.cstore.crate_hash_untracked(cnum);
             self.dep_graph.with_task(dep_node,
                                      self,
@@ -1622,7 +1623,8 @@ impl<'gcx: 'tcx, 'tcx> GlobalCtxt<'gcx> {
         ty::tls::with_related_context(tcx.global_tcx(), |icx| {
             let new_icx = ty::tls::ImplicitCtxt {
                 tcx,
-                query: icx.query.clone(),
+                query: icx.query,
+                diagnostics: icx.diagnostics,
                 layout_depth: icx.layout_depth,
                 task: icx.task,
             };
@@ -1871,7 +1873,7 @@ pub mod tls {
     use ty::query;
     use errors::{Diagnostic, TRACK_DIAGNOSTICS};
     use rustc_data_structures::OnDrop;
-    use rustc_data_structures::sync::{self, Lrc, Lock};
+    use rustc_data_structures::sync::{self, Lock, LrcRef};
     use dep_graph::OpenTask;
 
     #[cfg(not(parallel_queries))]
@@ -1893,7 +1895,11 @@ pub mod tls {
 
         /// The current query job, if any. This is updated by start_job in
         /// ty::query::plumbing when executing a query
-        pub query: Option<Lrc<query::QueryJob<'gcx>>>,
+        pub query: Option<LrcRef<'a, query::QueryJob<'gcx>>>,
+
+        /// Where to store diagnostics for the current query job, if any.
+        /// This is updated by start_job in ty::query::plumbing when executing a query
+        pub diagnostics: Option<&'a Lock<Option<Box<Vec<Diagnostic>>>>>,
 
         /// Used to prevent layout from recursing too deeply.
         pub layout_depth: usize,
@@ -1907,6 +1913,7 @@ pub mod tls {
     /// to `value` during the call to `f`. It is restored to its previous value after.
     /// This is used to set the pointer to the new ImplicitCtxt.
     #[cfg(parallel_queries)]
+    #[inline]
     fn set_tlv<F: FnOnce() -> R, R>(value: usize, f: F) -> R {
         rayon_core::tlv::with(value, f)
     }
@@ -1914,29 +1921,41 @@ pub mod tls {
     /// Gets Rayon's thread local variable which is preserved for Rayon jobs.
     /// This is used to get the pointer to the current ImplicitCtxt.
     #[cfg(parallel_queries)]
+    #[inline]
     fn get_tlv() -> usize {
         rayon_core::tlv::get()
     }
 
     /// A thread local variable which stores a pointer to the current ImplicitCtxt
     #[cfg(not(parallel_queries))]
-    thread_local!(static TLV: Cell<usize> = Cell::new(0));
+    // Accessing `thread_local` in another crate is bugged, so we have
+    // two accessors `set_raw_tlv` and `get_tlv` which do not have an
+    // inline attribute to prevent that
+    #[thread_local]
+    static TLV: Cell<usize> = Cell::new(0);
+
+    /// This is used to set the pointer to the current ImplicitCtxt.
+    #[cfg(not(parallel_queries))]
+    fn set_raw_tlv(value: usize) {
+        TLV.set(value)
+    }
 
     /// Sets TLV to `value` during the call to `f`.
     /// It is restored to its previous value after.
     /// This is used to set the pointer to the new ImplicitCtxt.
     #[cfg(not(parallel_queries))]
+    #[inline]
     fn set_tlv<F: FnOnce() -> R, R>(value: usize, f: F) -> R {
         let old = get_tlv();
-        let _reset = OnDrop(move || TLV.with(|tlv| tlv.set(old)));
-        TLV.with(|tlv| tlv.set(value));
+        let _reset = OnDrop(move || set_raw_tlv(old));
+        set_raw_tlv(value);
         f()
     }
 
     /// This is used to get the pointer to the current ImplicitCtxt.
     #[cfg(not(parallel_queries))]
     fn get_tlv() -> usize {
-        TLV.with(|tlv| tlv.get())
+        TLV.get()
     }
 
     /// This is a callback from libsyntax as it cannot access the implicit state
@@ -1957,8 +1976,12 @@ pub mod tls {
     fn track_diagnostic(diagnostic: &Diagnostic) {
         with_context_opt(|icx| {
             if let Some(icx) = icx {
-                if let Some(ref query) = icx.query {
-                    query.diagnostics.lock().push(diagnostic.clone());
+                if let Some(ref diagnostics) = icx.diagnostics {
+                    let mut diagnostics = diagnostics.lock();
+                    if diagnostics.is_none() {
+                        *diagnostics = Some(Box::new(Vec::new()));
+                    }
+                    diagnostics.as_mut().unwrap().push(diagnostic.clone());
                 }
             }
         })
@@ -1990,6 +2013,7 @@ pub mod tls {
     }
 
     /// Sets `context` as the new current ImplicitCtxt for the duration of the function `f`
+    #[inline]
     pub fn enter_context<'a, 'gcx: 'tcx, 'tcx, F, R>(context: &ImplicitCtxt<'a, 'gcx, 'tcx>,
                                                      f: F) -> R
         where F: FnOnce(&ImplicitCtxt<'a, 'gcx, 'tcx>) -> R
@@ -2023,6 +2047,7 @@ pub mod tls {
             let icx = ImplicitCtxt {
                 tcx,
                 query: None,
+                diagnostics: None,
                 layout_depth: 0,
                 task: &OpenTask::Ignore,
             };
@@ -2051,6 +2076,7 @@ pub mod tls {
         };
         let icx = ImplicitCtxt {
             query: None,
+            diagnostics: None,
             tcx,
             layout_depth: 0,
             task: &OpenTask::Ignore,
@@ -2059,6 +2085,7 @@ pub mod tls {
     }
 
     /// Allows access to the current ImplicitCtxt in a closure if one is available
+    #[inline]
     pub fn with_context_opt<F, R>(f: F) -> R
         where F: for<'a, 'gcx, 'tcx> FnOnce(Option<&ImplicitCtxt<'a, 'gcx, 'tcx>>) -> R
     {
@@ -2076,6 +2103,7 @@ pub mod tls {
 
     /// Allows access to the current ImplicitCtxt.
     /// Panics if there is no ImplicitCtxt available
+    #[inline]
     pub fn with_context<F, R>(f: F) -> R
         where F: for<'a, 'gcx, 'tcx> FnOnce(&ImplicitCtxt<'a, 'gcx, 'tcx>) -> R
     {
@@ -2087,6 +2115,7 @@ pub mod tls {
     /// with the same 'gcx lifetime as the TyCtxt passed in.
     /// This will panic if you pass it a TyCtxt which has a different global interner from
     /// the current ImplicitCtxt's tcx field.
+    #[inline]
     pub fn with_related_context<'a, 'gcx, 'tcx1, F, R>(tcx: TyCtxt<'a, 'gcx, 'tcx1>, f: F) -> R
         where F: for<'b, 'tcx2> FnOnce(&ImplicitCtxt<'b, 'gcx, 'tcx2>) -> R
     {
@@ -2105,6 +2134,7 @@ pub mod tls {
     /// is given an ImplicitCtxt with the same 'tcx and 'gcx lifetimes as the TyCtxt passed in.
     /// This will panic if you pass it a TyCtxt which has a different global interner or
     /// a different local interner from the current ImplicitCtxt's tcx field.
+    #[inline]
     pub fn with_fully_related_context<'a, 'gcx, 'tcx, F, R>(tcx: TyCtxt<'a, 'gcx, 'tcx>, f: F) -> R
         where F: for<'b> FnOnce(&ImplicitCtxt<'b, 'gcx, 'tcx>) -> R
     {
@@ -2122,6 +2152,7 @@ pub mod tls {
 
     /// Allows access to the TyCtxt in the current ImplicitCtxt.
     /// Panics if there is no ImplicitCtxt available
+    #[inline]
     pub fn with<F, R>(f: F) -> R
         where F: for<'a, 'gcx, 'tcx> FnOnce(TyCtxt<'a, 'gcx, 'tcx>) -> R
     {
@@ -2130,6 +2161,7 @@ pub mod tls {
 
     /// Allows access to the TyCtxt in the current ImplicitCtxt.
     /// The closure is passed None if there is no ImplicitCtxt available
+    #[inline]
     pub fn with_opt<F, R>(f: F) -> R
         where F: for<'a, 'gcx, 'tcx> FnOnce(Option<TyCtxt<'a, 'gcx, 'tcx>>) -> R
     {
diff --git a/src/librustc/ty/query/config.rs b/src/librustc/ty/query/config.rs
index 5d12aaeed5f79..3beda4590ce5c 100644
--- a/src/librustc/ty/query/config.rs
+++ b/src/librustc/ty/query/config.rs
@@ -52,7 +52,8 @@ pub(super) trait QueryAccessors<'tcx>: QueryConfig<'tcx> {
     fn to_dep_node(tcx: TyCtxt<'_, 'tcx, '_>, key: &Self::Key) -> DepNode;
 
     // Don't use this method to compute query results, instead use the methods on TyCtxt
-    fn compute(tcx: TyCtxt<'_, 'tcx, '_>, key: Self::Key) -> Self::Value;
+    #[inline(always)]
+    fn compute(tcx: TyCtxt<'_, 'tcx, 'tcx>, key: Self::Key) -> Self::Value;
 
     fn handle_cycle_error(tcx: TyCtxt<'_, 'tcx, '_>) -> Self::Value;
 }
diff --git a/src/librustc/ty/query/job.rs b/src/librustc/ty/query/job.rs
index 1439e41bb31fd..ff7870652f428 100644
--- a/src/librustc/ty/query/job.rs
+++ b/src/librustc/ty/query/job.rs
@@ -12,14 +12,18 @@
 
 use std::mem;
 use rustc_data_structures::fx::FxHashSet;
-use rustc_data_structures::sync::{Lock, LockGuard, Lrc, Weak};
+use rustc_data_structures::sync::{Lock, LockGuard, Lrc, LrcRef, Weak};
 use rustc_data_structures::OnDrop;
 use syntax_pos::Span;
 use ty::tls;
 use ty::query::Query;
 use ty::query::plumbing::CycleError;
+#[cfg(not(parallel_queries))]
+use ty::query::{
+    plumbing::TryGetJob,
+    config::QueryDescription,
+};
 use ty::context::TyCtxt;
-use errors::Diagnostic;
 use std::process;
 use std::{fmt, ptr};
 
@@ -35,15 +39,6 @@ use {
     rustc_data_structures::stable_hasher::{StableHasherResult, StableHasher, HashStable},
 };
 
-/// Indicates the state of a query for a given key in a query map
-pub(super) enum QueryResult<'tcx> {
-    /// An already executing query. The query job can be used to await for its completion
-    Started(Lrc<QueryJob<'tcx>>),
-
-    /// The query panicked. Queries trying to wait on this will raise a fatal error / silently panic
-    Poisoned,
-}
-
 /// A span and a query key
 #[derive(Clone, Debug)]
 pub struct QueryInfo<'tcx> {
@@ -59,9 +54,6 @@ pub struct QueryJob<'tcx> {
     /// The parent query job which created this job and is implicitly waiting on it.
     pub parent: Option<Lrc<QueryJob<'tcx>>>,
 
-    /// Diagnostic messages which are emitted while the query executes
-    pub diagnostics: Lock<Vec<Diagnostic>>,
-
     /// The latch which is used to wait on this job
     #[cfg(parallel_queries)]
     latch: QueryLatch<'tcx>,
@@ -71,7 +63,6 @@ impl<'tcx> QueryJob<'tcx> {
     /// Creates a new query job
     pub fn new(info: QueryInfo<'tcx>, parent: Option<Lrc<QueryJob<'tcx>>>) -> Self {
         QueryJob {
-            diagnostics: Lock::new(Vec::new()),
             info,
             parent,
             #[cfg(parallel_queries)]
@@ -83,36 +74,44 @@ impl<'tcx> QueryJob<'tcx> {
     ///
     /// For single threaded rustc there's no concurrent jobs running, so if we are waiting for any
     /// query that means that there is a query cycle, thus this always running a cycle error.
-    pub(super) fn await<'lcx>(
+    #[cfg(not(parallel_queries))]
+    #[inline(never)]
+    #[cold]
+    pub(super) fn await<'lcx, 'a, D: QueryDescription<'tcx>>(
         &self,
         tcx: TyCtxt<'_, 'tcx, 'lcx>,
         span: Span,
-    ) -> Result<(), CycleError<'tcx>> {
-        #[cfg(not(parallel_queries))]
-        {
-            self.find_cycle_in_stack(tcx, span)
-        }
+    ) -> TryGetJob<'a, 'tcx, D> {
+        TryGetJob::JobCompleted(Err(Box::new(self.find_cycle_in_stack(tcx, span))))
+    }
 
-        #[cfg(parallel_queries)]
-        {
-            tls::with_related_context(tcx, move |icx| {
-                let mut waiter = Lrc::new(QueryWaiter {
-                    query: icx.query.clone(),
-                    span,
-                    cycle: Lock::new(None),
-                    condvar: Condvar::new(),
-                });
-                self.latch.await(&waiter);
-                // FIXME: Get rid of this lock. We have ownership of the QueryWaiter
-                // although another thread may still have a Lrc reference so we cannot
-                // use Lrc::get_mut
-                let mut cycle = waiter.cycle.lock();
-                match cycle.take() {
-                    None => Ok(()),
-                    Some(cycle) => Err(cycle)
-                }
-            })
-        }
+    /// Awaits for the query job to complete.
+    ///
+    /// For single threaded rustc there's no concurrent jobs running, so if we are waiting for any
+    /// query that means that there is a query cycle, thus this always running a cycle error.
+    #[cfg(parallel_queries)]
+    pub(super) fn await<'lcx>(
+        &self,
+        tcx: TyCtxt<'_, 'tcx, 'lcx>,
+        span: Span,
+    ) -> Result<(), Box<CycleError<'tcx>>> {
+        tls::with_related_context(tcx, move |icx| {
+            let mut waiter = Lrc::new(QueryWaiter {
+                query: icx.query.map(|q| LrcRef::into(q)),
+                span,
+                cycle: Lock::new(None),
+                condvar: Condvar::new(),
+            });
+            self.latch.await(&waiter);
+            // FIXME: Get rid of this lock. We have ownership of the QueryWaiter
+            // although another thread may still have a Lrc reference so we cannot
+            // use Lrc::get_mut
+            let mut cycle = waiter.cycle.lock();
+            match cycle.take() {
+                None => Ok(()),
+                Some(cycle) => Err(Box::new(cycle))
+            }
+        })
     }
 
     #[cfg(not(parallel_queries))]
@@ -120,9 +119,11 @@ impl<'tcx> QueryJob<'tcx> {
         &self,
         tcx: TyCtxt<'_, 'tcx, 'lcx>,
         span: Span,
-    ) -> Result<(), CycleError<'tcx>> {
+    ) -> CycleError<'tcx> {
         // Get the current executing query (waiter) and find the waitee amongst its parents
-        let mut current_job = tls::with_related_context(tcx, |icx| icx.query.clone());
+        let mut current_job = tls::with_related_context(tcx, |icx| {
+            icx.query.map(|q| LrcRef::into(q))
+        });
         let mut cycle = Vec::new();
 
         while let Some(job) = current_job {
@@ -140,10 +141,10 @@ impl<'tcx> QueryJob<'tcx> {
                 let usage = job.parent.as_ref().map(|parent| {
                     (job.info.span, parent.info.query.clone())
                 });
-                return Err(CycleError { usage, cycle });
+                return CycleError { usage, cycle };
             }
 
-            current_job = job.parent.clone();
+            current_job = job.parent.as_ref().map(|parent| parent.clone());
         }
 
         panic!("did not find a cycle")
diff --git a/src/librustc/ty/query/mod.rs b/src/librustc/ty/query/mod.rs
index 699c2d111c639..a1fc7feeb4cca 100644
--- a/src/librustc/ty/query/mod.rs
+++ b/src/librustc/ty/query/mod.rs
@@ -705,22 +705,22 @@ impl<'a, 'tcx, 'lcx> TyCtxt<'a, 'tcx, 'lcx> {
         self,
         span: Span,
         key: DefId,
-    ) -> Result<&'tcx [Ty<'tcx>], DiagnosticBuilder<'a>> {
-        self.try_get_query::<queries::adt_sized_constraint<'_>>(span, key)
+    ) -> Result<&'tcx [Ty<'tcx>], Box<DiagnosticBuilder<'a>>> {
+        self.global_tcx().try_get_query::<queries::adt_sized_constraint<'_>>(span, key)
     }
     pub fn try_needs_drop_raw(
         self,
         span: Span,
         key: ty::ParamEnvAnd<'tcx, Ty<'tcx>>,
-    ) -> Result<bool, DiagnosticBuilder<'a>> {
-        self.try_get_query::<queries::needs_drop_raw<'_>>(span, key)
+    ) -> Result<bool, Box<DiagnosticBuilder<'a>>> {
+        self.global_tcx().try_get_query::<queries::needs_drop_raw<'_>>(span, key)
     }
     pub fn try_optimized_mir(
         self,
         span: Span,
         key: DefId,
-    ) -> Result<&'tcx mir::Mir<'tcx>, DiagnosticBuilder<'a>> {
-        self.try_get_query::<queries::optimized_mir<'_>>(span, key)
+    ) -> Result<&'tcx mir::Mir<'tcx>, Box<DiagnosticBuilder<'a>>> {
+        self.global_tcx().try_get_query::<queries::optimized_mir<'_>>(span, key)
     }
 }
 
diff --git a/src/librustc/ty/query/on_disk_cache.rs b/src/librustc/ty/query/on_disk_cache.rs
index 7d3ae64f4fcd6..b81dc67d2e8f6 100644
--- a/src/librustc/ty/query/on_disk_cache.rs
+++ b/src/librustc/ty/query/on_disk_cache.rs
@@ -351,11 +351,13 @@ impl<'sess> OnDiskCache<'sess> {
     /// Store a diagnostic emitted during the current compilation session.
     /// Anything stored like this will be available via `load_diagnostics` in
     /// the next compilation session.
+    #[inline(never)]
+    #[cold]
     pub fn store_diagnostics(&self,
                              dep_node_index: DepNodeIndex,
-                             diagnostics: Vec<Diagnostic>) {
+                             diagnostics: Box<Vec<Diagnostic>>) {
         let mut current_diagnostics = self.current_diagnostics.borrow_mut();
-        let prev = current_diagnostics.insert(dep_node_index, diagnostics);
+        let prev = current_diagnostics.insert(dep_node_index, *diagnostics);
         debug_assert!(prev.is_none());
     }
 
@@ -377,13 +379,15 @@ impl<'sess> OnDiskCache<'sess> {
     /// Since many anonymous queries can share the same `DepNode`, we aggregate
     /// them -- as opposed to regular queries where we assume that there is a
     /// 1:1 relationship between query-key and `DepNode`.
+    #[inline(never)]
+    #[cold]
     pub fn store_diagnostics_for_anon_node(&self,
                                            dep_node_index: DepNodeIndex,
-                                           mut diagnostics: Vec<Diagnostic>) {
+                                           mut diagnostics: Box<Vec<Diagnostic>>) {
         let mut current_diagnostics = self.current_diagnostics.borrow_mut();
 
         let x = current_diagnostics.entry(dep_node_index).or_insert_with(|| {
-            mem::replace(&mut diagnostics, Vec::new())
+            mem::replace(&mut *diagnostics, Vec::new())
         });
 
         x.extend(diagnostics.into_iter());
diff --git a/src/librustc/ty/query/plumbing.rs b/src/librustc/ty/query/plumbing.rs
index 5f33d466c4a19..8f956f32dc373 100644
--- a/src/librustc/ty/query/plumbing.rs
+++ b/src/librustc/ty/query/plumbing.rs
@@ -12,31 +12,36 @@
 //! that generate the actual methods on tcx which find and execute the
 //! provider, manage the caches, and so forth.
 
-use dep_graph::{DepNodeIndex, DepNode, DepKind, DepNodeColor};
+use dep_graph::{DepNodeIndex, DepNode, DepKind, DepNodeColor, OpenTask};
 use errors::DiagnosticBuilder;
 use errors::Level;
 use errors::Diagnostic;
-use errors::FatalError;
 use ty::tls;
 use ty::{TyCtxt};
 use ty::query::Query;
 use ty::query::config::{QueryConfig, QueryDescription};
-use ty::query::job::{QueryJob, QueryResult, QueryInfo};
+use ty::query::job::{QueryJob, QueryInfo};
 use ty::item_path;
 
 use util::common::{profq_msg, ProfileQueriesMsg, QueryMsg};
 
 use rustc_data_structures::fx::{FxHashMap};
-use rustc_data_structures::sync::{Lrc, Lock};
+use rustc_data_structures::sync::{Lrc, LrcRef, Lock};
+use rustc_data_structures::by_move::{Move, MoveSlot};
+use std::hash::{Hash, Hasher, BuildHasher};
 use std::mem;
 use std::ptr;
-use std::collections::hash_map::Entry;
+use std::collections::hash_map::RawEntryMut;
 use syntax_pos::Span;
 use syntax::source_map::DUMMY_SP;
 
 pub struct QueryCache<'tcx, D: QueryConfig<'tcx> + ?Sized> {
+    /// Completed queries have their result stored here
     pub(super) results: FxHashMap<D::Key, QueryValue<D::Value>>,
-    pub(super) active: FxHashMap<D::Key, QueryResult<'tcx>>,
+
+    /// Queries under execution will have an entry in this map.
+    /// The query job inside can be used to await for completion of queries.
+    pub(super) active: FxHashMap<D::Key, Lrc<QueryJob<'tcx>>>,
 }
 
 pub(super) struct QueryValue<T> {
@@ -96,7 +101,77 @@ macro_rules! profq_query_msg {
 pub(super) struct JobOwner<'a, 'tcx: 'a, Q: QueryDescription<'tcx> + 'a> {
     cache: &'a Lock<QueryCache<'tcx, Q>>,
     key: Q::Key,
+    key_hash: u64,
     job: Lrc<QueryJob<'tcx>>,
+    // FIXME: Remove ImplicitCtxt.layout_depth to get rid of this field
+    layout_depth: usize,
+}
+
+use hir::def_id::DefId;
+
+    use std::mem::MaybeUninit;
+#[inline(never)]
+pub fn may_panic<'tcx>(job: Lrc<QueryJob<'tcx>>) {
+    Box::new(job);
+}
+
+#[no_mangle]
+fn test_moves2<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
+    key: &DefId, key_hash: u64,
+        span: Span,
+        parent: &Option<Lrc<QueryJob<'tcx>>>,
+    mut job_storage: MoveSlot<'a, JobOwner<'a, 'tcx, ::ty::query::queries::type_of<'tcx>>>,
+) -> TryGetJob<'a, 'tcx, ::ty::query::queries::type_of<'tcx>> {
+    use ty::query::config::QueryAccessors;
+    let mut job = Lrc::new(MaybeUninit::uninitialized());
+    let parent = parent.clone();
+    let info = QueryInfo {
+        span,
+        query: ::ty::query::queries::type_of::query(*key),
+    };
+    *Lrc::get_mut(&mut job).unwrap() =
+        MaybeUninit::new(QueryJob::new(info, parent.clone()));
+    let job: Lrc<QueryJob<'tcx>> = unsafe { std::mem::transmute(job) };
+    let job_clone = job.clone();
+    may_panic(job);
+    let owner = job_storage.init(JobOwner {
+        cache: ::ty::query::queries::type_of::query_cache(tcx),
+        job: job_clone,
+        key: (*key).clone(),
+        key_hash,
+        layout_depth: 83952,
+    });
+    TryGetJob::NotYetStarted(owner)
+}
+
+#[no_mangle]
+fn test_moves<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
+    key: &::hir::def_id::DefId, key_hash: u64,
+        span: Span,
+    mut job_storage: MoveSlot<'a, JobOwner<'a, 'tcx, ::ty::query::queries::type_of<'tcx>>>,
+) -> TryGetJob<'a, 'tcx, ::ty::query::queries::type_of<'tcx>> {
+    use ty::query::config::QueryAccessors;
+    tls::with_related_context(tcx, |icx| {
+        let mut job = Lrc::new(MaybeUninit::uninitialized());
+        let info = QueryInfo {
+            span,
+            query: ::ty::query::queries::type_of::query(*key),
+        };
+        let parent = icx.query.map(|q| LrcRef::into(q));
+        *Lrc::get_mut(&mut job).unwrap() =
+            MaybeUninit::new(QueryJob::new(info, parent));
+        let job: Lrc<QueryJob<'tcx>> = unsafe { std::mem::transmute(job) };
+        let job_clone = job.clone();
+        may_panic(job);
+        let owner = job_storage.init(JobOwner {
+            cache: ::ty::query::queries::type_of::query_cache(tcx),
+            job: job_clone,
+            key: (*key).clone(),
+            key_hash,
+            layout_depth: icx.layout_depth,
+        });
+        TryGetJob::NotYetStarted(owner)
+    })
 }
 
 impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
@@ -112,11 +187,21 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
         tcx: TyCtxt<'a, 'tcx, '_>,
         span: Span,
         key: &Q::Key,
+        mut job_storage: MoveSlot<'a, JobOwner<'a, 'tcx, Q>>,
     ) -> TryGetJob<'a, 'tcx, Q> {
         let cache = Q::query_cache(tcx);
         loop {
             let mut lock = cache.borrow_mut();
-            if let Some(value) = lock.results.get(key) {
+
+            // Calculate the key hash
+            let mut hasher = lock.results.hasher().build_hasher();
+            key.hash(&mut hasher);
+            let key_hash = hasher.finish();
+
+            // QueryCache::results and QueryCache::active use the same
+            // hasher so we can reuse the hash for the key
+            if let Some((_, value)) = lock.results.raw_entry()
+                                          .from_key_hashed_nocheck(key_hash, key) {
                 profq_msg!(tcx, ProfileQueriesMsg::CacheHit);
                 tcx.sess.profiler(|p| {
                     p.record_query(Q::CATEGORY);
@@ -126,45 +211,56 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
                 let result = Ok((value.value.clone(), value.index));
                 return TryGetJob::JobCompleted(result);
             }
-            let job = match lock.active.entry((*key).clone()) {
-                Entry::Occupied(entry) => {
-                    match *entry.get() {
-                        QueryResult::Started(ref job) => job.clone(),
-                        QueryResult::Poisoned => FatalError.raise(),
-                    }
-                }
-                Entry::Vacant(entry) => {
+            let job = match lock.active.raw_entry_mut().from_key_hashed_nocheck(key_hash, key) {
+                RawEntryMut::Occupied(entry) => entry.get().clone(),
+                RawEntryMut::Vacant(entry) => {
                     // No job entry for this query. Return a new one to be started later
                     return tls::with_related_context(tcx, |icx| {
+                        // Create the `parent` variable before `info`. This allows LLVM
+                        // to elide the move of `info`
+                        let parent = icx.query.map(|q| LrcRef::into(q));
                         let info = QueryInfo {
                             span,
                             query: Q::query(key.clone()),
                         };
-                        let job = Lrc::new(QueryJob::new(info, icx.query.clone()));
-                        let owner = JobOwner {
+                        let job = Lrc::new(QueryJob::new(info, parent));
+                        let owner = job_storage.init(JobOwner {
                             cache,
                             job: job.clone(),
                             key: (*key).clone(),
-                        };
-                        entry.insert(QueryResult::Started(job));
+                            key_hash,
+                            layout_depth: icx.layout_depth,
+                        });
+                        entry.insert_hashed_nocheck(
+                            key_hash,
+                            key.clone(),
+                            job);
                         TryGetJob::NotYetStarted(owner)
                     })
                 }
             };
             mem::drop(lock);
 
-            if let Err(cycle) = job.await(tcx, span) {
-                return TryGetJob::JobCompleted(Err(cycle));
+            #[cfg(not(parallel_queries))]
+            return job.await(tcx, span);
+
+            #[cfg(parallel_queries)]
+            {
+                if let Err(cycle) = job.await(tcx, span) {
+                    return TryGetJob::JobCompleted(Err(cycle));
+                }
             }
         }
     }
 
     /// Completes the query by updating the query cache with the `result`,
     /// signals the waiter and forgets the JobOwner, so it won't poison the query
-    pub(super) fn complete(self, result: &Q::Value, dep_node_index: DepNodeIndex) {
+    #[inline]
+    pub(super) fn complete(self: Move<'_, Self>, result: &Q::Value, dep_node_index: DepNodeIndex) {
         // We can move out of `self` here because we `mem::forget` it below
         let key = unsafe { ptr::read(&self.key) };
         let job = unsafe { ptr::read(&self.job) };
+        let key_hash = self.key_hash;
         let cache = self.cache;
 
         // Forget ourself so our destructor won't poison the query
@@ -173,8 +269,13 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
         let value = QueryValue::new(result.clone(), dep_node_index);
         {
             let mut lock = cache.borrow_mut();
-            lock.active.remove(&key);
-            lock.results.insert(key, value);
+
+            match lock.active.raw_entry_mut().from_key_hashed_nocheck(key_hash, &key) {
+                RawEntryMut::Occupied(entry) => entry.remove(),
+                _ => unreachable!(),
+            };
+            lock.results.raw_entry_mut().from_key_hashed_nocheck(key_hash, &key)
+                                        .or_insert(key, value);
         }
 
         job.signal_complete();
@@ -183,45 +284,51 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
     /// Executes a job by changing the ImplicitCtxt to point to the
     /// new query job while it executes. It returns the diagnostics
     /// captured during execution and the actual result.
-    pub(super) fn start<'lcx, F, R>(
+    #[inline(always)]
+    pub(super) fn compute(
         &self,
-        tcx: TyCtxt<'_, 'tcx, 'lcx>,
-        compute: F)
-    -> (R, Vec<Diagnostic>)
-    where
-        F: for<'b> FnOnce(TyCtxt<'b, 'tcx, 'lcx>) -> R
+        tcx: TyCtxt<'_, 'tcx, 'tcx>,
+        task: &OpenTask,
+        diagnostics: Option<&Lock<Option<Box<Vec<Diagnostic>>>>>,
+        key: Q::Key,
+    ) -> Q::Value
     {
-        // The TyCtxt stored in TLS has the same global interner lifetime
-        // as `tcx`, so we use `with_related_context` to relate the 'gcx lifetimes
-        // when accessing the ImplicitCtxt
-        let r = tls::with_related_context(tcx, move |current_icx| {
-            // Update the ImplicitCtxt to point to our new query job
-            let new_icx = tls::ImplicitCtxt {
-                tcx,
-                query: Some(self.job.clone()),
-                layout_depth: current_icx.layout_depth,
-                task: current_icx.task,
-            };
+        // Update the ImplicitCtxt to point to our new query job
+        let new_icx = tls::ImplicitCtxt {
+            tcx,
+            query: Some(LrcRef::new(&self.job)),
+            diagnostics,
+            layout_depth: self.layout_depth,
+            task,
+        };
 
-            // Use the ImplicitCtxt while we execute the query
-            tls::enter_context(&new_icx, |_| {
-                compute(tcx)
-            })
-        });
+        // Use the ImplicitCtxt while we execute the query
+        tls::enter_context(&new_icx, |_| {
+            Q::compute(tcx, key)
+        })
+    }
 
-        // Extract the diagnostic from the job
-        let diagnostics = mem::replace(&mut *self.job.diagnostics.lock(), Vec::new());
+}
 
-        (r, diagnostics)
-    }
+#[inline(always)]
+fn with_diagnostics<F, R>(f: F) -> (R, Option<Box<Vec<Diagnostic>>>)
+where
+    F: FnOnce(Option<&Lock<Option<Box<Vec<Diagnostic>>>>>) -> R
+{
+    let diagnostics = Lock::new(None);
+    let result = f(Some(&diagnostics));
+    (result, diagnostics.into_inner())
 }
 
 impl<'a, 'tcx, Q: QueryDescription<'tcx>> Drop for JobOwner<'a, 'tcx, Q> {
+    #[inline(never)]
+    #[cold]
     fn drop(&mut self) {
-        // Poison the query so jobs waiting on it panic
-        self.cache.borrow_mut().active.insert(self.key.clone(), QueryResult::Poisoned);
-        // Also signal the completion of the job, so waiters
-        // will continue execution
+        // This job failed to execute due to a panic.
+        // Remove it from the list of active queries
+        self.cache.borrow_mut().active.remove(&self.key);
+        // Signal that the job not longer executes, so the waiters will continue execution.
+        // The waiters will try to execute the query which may result in them panicking too.
         self.job.signal_complete();
     }
 }
@@ -234,19 +341,23 @@ pub struct CycleError<'tcx> {
 }
 
 /// The result of `try_get_lock`
-pub(super) enum TryGetJob<'a, 'tcx: 'a, D: QueryDescription<'tcx> + 'a> {
+pub(super) enum TryGetJob<'a, 'tcx: 'a, Q: QueryDescription<'tcx> + 'a> {
     /// The query is not yet started. Contains a guard to the cache eventually used to start it.
-    NotYetStarted(JobOwner<'a, 'tcx, D>),
+    NotYetStarted(Move<'a, JobOwner<'a, 'tcx, Q>>),
 
     /// The query was already completed.
     /// Returns the result of the query and its dep node index
     /// if it succeeded or a cycle error if it failed
-    JobCompleted(Result<(D::Value, DepNodeIndex), CycleError<'tcx>>),
+    JobCompleted(Result<(Q::Value, DepNodeIndex), Box<CycleError<'tcx>>>),
 }
 
-impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
-    pub(super) fn report_cycle(self, CycleError { usage, cycle: stack }: CycleError<'gcx>)
-        -> DiagnosticBuilder<'a>
+impl<'a, 'gcx> TyCtxt<'a, 'gcx, 'gcx> {
+    #[inline(never)]
+    #[cold]
+    pub(super) fn report_cycle(
+        self,
+        box CycleError { usage, cycle: stack }: Box<CycleError<'gcx>>
+    ) -> Box<DiagnosticBuilder<'a>>
     {
         assert!(!stack.is_empty());
 
@@ -280,7 +391,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
                               &format!("cycle used when {}", query.describe(self)));
             }
 
-            return err
+            return Box::new(err)
         })
     }
 
@@ -289,7 +400,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
 
         tls::with_context_opt(|icx| {
             if let Some(icx) = icx {
-                let mut current_query = icx.query.clone();
+                let mut current_query = icx.query.map(|q| LrcRef::into(q));
                 let mut i = 0;
 
                 while let Some(query) = current_query {
@@ -331,7 +442,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
                 if !self.dep_graph.is_fully_enabled() {
                     return None;
                 }
-                match self.dep_graph.try_mark_green(self.global_tcx(), &dep_node) {
+                match self.dep_graph.try_mark_green(self, &dep_node) {
                     Some(dep_node_index) => {
                         debug_assert!(self.dep_graph.is_green(&dep_node));
                         self.dep_graph.read_index(dep_node_index);
@@ -345,11 +456,12 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
         }
     }
 
+    #[inline(never)]
     fn try_get_with<Q: QueryDescription<'gcx>>(
         self,
         span: Span,
         key: Q::Key)
-    -> Result<Q::Value, CycleError<'gcx>>
+    -> Result<Q::Value, Box<CycleError<'gcx>>>
     {
         debug!("ty::queries::{}::try_get_with(key={:?}, span={:?})",
                Q::NAME,
@@ -365,7 +477,8 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
 
         self.sess.profiler(|p| p.record_query(Q::CATEGORY));
 
-        let job = match JobOwner::try_get(self, span, &key) {
+        let job_slot = uninit_slot!();
+        let job = match JobOwner::try_get(self, span, &key, Move::uninit(job_slot)) {
             TryGetJob::NotYetStarted(job) => job,
             TryGetJob::JobCompleted(result) => {
                 return result.map(|(v, index)| {
@@ -389,27 +502,29 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
             profq_msg!(self, ProfileQueriesMsg::ProviderBegin);
             self.sess.profiler(|p| p.start_activity(Q::CATEGORY));
 
-            let res = job.start(self, |tcx| {
-                tcx.dep_graph.with_anon_task(dep_node.kind, || {
-                    Q::compute(tcx.global_tcx(), key)
+            let (res, diag) = with_diagnostics(|diagnostics| {
+                    self.dep_graph.with_anon_open_task(dep_node.kind, |open_task| {
+                    job.compute(self, open_task, diagnostics, key)
                 })
             });
 
             self.sess.profiler(|p| p.end_activity(Q::CATEGORY));
             profq_msg!(self, ProfileQueriesMsg::ProviderEnd);
-            let ((result, dep_node_index), diagnostics) = res;
+            let (result, dep_node_index) = res;
 
             self.dep_graph.read_index(dep_node_index);
 
-            self.queries.on_disk_cache
-                .store_diagnostics_for_anon_node(dep_node_index, diagnostics);
+            if let Some(diagnostics) = diag {
+                self.queries.on_disk_cache
+                    .store_diagnostics_for_anon_node(dep_node_index, diagnostics);
+            }
 
             job.complete(&result, dep_node_index);
 
             return Ok(result);
         }
 
-        if !dep_node.kind.is_input() {
+        if !dep_node.kind.is_input_inlined() {
             if let Some(dep_node_index) = self.try_mark_green_and_read(&dep_node) {
                 profq_msg!(self, ProfileQueriesMsg::CacheHit);
                 self.sess.profiler(|p| p.record_query_hit(Q::CATEGORY));
@@ -433,10 +548,10 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
     fn load_from_disk_and_cache_in_memory<Q: QueryDescription<'gcx>>(
         self,
         key: Q::Key,
-        job: JobOwner<'a, 'gcx, Q>,
+        job: Move<'_, JobOwner<'_, 'gcx, Q>>,
         dep_node_index: DepNodeIndex,
         dep_node: &DepNode
-    ) -> Result<Q::Value, CycleError<'gcx>>
+    ) -> Result<Q::Value, Box<CycleError<'gcx>>>
     {
         // Note this function can be called concurrently from the same query
         // We must ensure that this is handled correctly
@@ -448,8 +563,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
                         self.sess.opts.debugging_opts.incremental_queries {
             let prev_dep_node_index =
                 self.dep_graph.prev_dep_node_index_of(dep_node);
-            let result = Q::try_load_from_disk(self.global_tcx(),
-                                               prev_dep_node_index);
+            let result = Q::try_load_from_disk(self, prev_dep_node_index);
 
             // We always expect to find a cached result for things that
             // can be forced from DepNode.
@@ -472,57 +586,65 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
             // The diagnostics for this query have already been
             // promoted to the current session during
             // try_mark_green(), so we can ignore them here.
-            let (result, _) = job.start(self, |tcx| {
-                // The dep-graph for this computation is already in
-                // place
-                tcx.dep_graph.with_ignore(|| {
-                    Q::compute(tcx, key)
-                })
-            });
-            result
+            // The dep-graph for this computation is already in
+            // place so we pass OpenTask::Ignore.
+            job.compute(self, &OpenTask::Ignore, None, key)
         };
 
         // If -Zincremental-verify-ich is specified, re-hash results from
         // the cache and make sure that they have the expected fingerprint.
-        if self.sess.opts.debugging_opts.incremental_verify_ich {
-            use rustc_data_structures::stable_hasher::{StableHasher, HashStable};
-            use ich::Fingerprint;
+        if unlikely!(self.sess.opts.debugging_opts.incremental_verify_ich) {
+            self.incremental_verify_ich::<Q>(&result, dep_node, dep_node_index);
+        }
 
-            assert!(Some(self.dep_graph.fingerprint_of(dep_node_index)) ==
-                    self.dep_graph.prev_fingerprint_of(dep_node),
-                    "Fingerprint for green query instance not loaded \
-                     from cache: {:?}", dep_node);
+        if unlikely!(self.sess.opts.debugging_opts.query_dep_graph) {
+            self.dep_graph.mark_loaded_from_cache(dep_node_index, true);
+        }
 
-            debug!("BEGIN verify_ich({:?})", dep_node);
-            let mut hcx = self.create_stable_hashing_context();
-            let mut hasher = StableHasher::new();
+        job.complete(&result, dep_node_index);
+
+        Ok(result)
+    }
 
-            result.hash_stable(&mut hcx, &mut hasher);
+    #[inline(never)]
+    #[cold]
+    fn incremental_verify_ich<Q: QueryDescription<'gcx>>(
+        self,
+        result: &Q::Value,
+        dep_node: &DepNode,
+        dep_node_index: DepNodeIndex,
+    ) {
+        use rustc_data_structures::stable_hasher::{StableHasher, HashStable};
+        use ich::Fingerprint;
 
-            let new_hash: Fingerprint = hasher.finish();
-            debug!("END verify_ich({:?})", dep_node);
+        assert!(Some(self.dep_graph.fingerprint_of(dep_node_index)) ==
+                self.dep_graph.prev_fingerprint_of(dep_node),
+                "Fingerprint for green query instance not loaded \
+                    from cache: {:?}", dep_node);
 
-            let old_hash = self.dep_graph.fingerprint_of(dep_node_index);
+        debug!("BEGIN verify_ich({:?})", dep_node);
+        let mut hcx = self.create_stable_hashing_context();
+        let mut hasher = StableHasher::new();
 
-            assert!(new_hash == old_hash, "Found unstable fingerprints \
-                for {:?}", dep_node);
-        }
+        result.hash_stable(&mut hcx, &mut hasher);
 
-        if self.sess.opts.debugging_opts.query_dep_graph {
-            self.dep_graph.mark_loaded_from_cache(dep_node_index, true);
-        }
+        let new_hash: Fingerprint = hasher.finish();
+        debug!("END verify_ich({:?})", dep_node);
 
-        job.complete(&result, dep_node_index);
+        let old_hash = self.dep_graph.fingerprint_of(dep_node_index);
 
-        Ok(result)
+        assert!(new_hash == old_hash, "Found unstable fingerprints \
+            for {:?}", dep_node);
     }
 
+    // Inlined so LLVM can tell what kind of DepNode we are using
+    #[inline(always)]
     fn force_query_with_job<Q: QueryDescription<'gcx>>(
         self,
         key: Q::Key,
-        job: JobOwner<'_, 'gcx, Q>,
+        job: Move<'_, JobOwner<'_, 'gcx, Q>>,
         dep_node: DepNode)
-    -> Result<(Q::Value, DepNodeIndex), CycleError<'gcx>> {
+    -> Result<(Q::Value, DepNodeIndex), Box<CycleError<'gcx>>> {
         // If the following assertion triggers, it can have two reasons:
         // 1. Something is wrong with DepNode creation, either here or
         //    in DepGraph::try_mark_green()
@@ -540,32 +662,30 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
             p.record_query(Q::CATEGORY);
         });
 
-        let res = job.start(self, |tcx| {
+        let ((result, dep_node_index), diagnostics) = with_diagnostics(|diagnostics| {
             if dep_node.kind.is_eval_always() {
-                tcx.dep_graph.with_eval_always_task(dep_node,
-                                                    tcx,
-                                                    key,
-                                                    Q::compute)
+                self.dep_graph.with_eval_always_task(self, dep_node, |task| {
+                    job.compute(self, task, diagnostics, key)
+                })
             } else {
-                tcx.dep_graph.with_task(dep_node,
-                                        tcx,
-                                        key,
-                                        Q::compute)
+                self.dep_graph.with_query_task(self, dep_node, |task| {
+                    job.compute(self, task, diagnostics, key)
+                })
             }
         });
 
         self.sess.profiler(|p| p.end_activity(Q::CATEGORY));
         profq_msg!(self, ProfileQueriesMsg::ProviderEnd);
 
-        let ((result, dep_node_index), diagnostics) = res;
-
-        if self.sess.opts.debugging_opts.query_dep_graph {
+        if unlikely!(self.sess.opts.debugging_opts.query_dep_graph) {
             self.dep_graph.mark_loaded_from_cache(dep_node_index, false);
         }
 
         if dep_node.kind != ::dep_graph::DepKind::Null {
-            self.queries.on_disk_cache
-                .store_diagnostics(dep_node_index, diagnostics);
+            if let Some(diagnostics) = diagnostics {
+                self.queries.on_disk_cache
+                    .store_diagnostics(dep_node_index, diagnostics);
+            }
         }
 
         job.complete(&result, dep_node_index);
@@ -585,7 +705,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
 
         // Ensuring an "input" or anonymous query makes no sense
         assert!(!dep_node.kind.is_anon());
-        assert!(!dep_node.kind.is_input());
+        assert!(!dep_node.kind.is_input_inlined());
         if self.try_mark_green_and_read(&dep_node).is_none() {
             // A None return from `try_mark_green_and_read` means that this is either
             // a new dep node or that the dep node has already been marked red.
@@ -611,37 +731,56 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
         key: Q::Key,
         span: Span,
         dep_node: DepNode
-    ) -> Result<(Q::Value, DepNodeIndex), CycleError<'gcx>> {
+    ) {
+        profq_msg!(
+            self,
+            ProfileQueriesMsg::QueryBegin(span.data(), profq_query_msg!(Q::NAME, self, key))
+        );
+
         // We may be concurrently trying both execute and force a query
         // Ensure that only one of them runs the query
-        let job = match JobOwner::try_get(self, span, &key) {
+        let job_slot = uninit_slot!();
+        let job = match JobOwner::try_get(self, span, &key, Move::uninit(job_slot)) {
             TryGetJob::NotYetStarted(job) => job,
-            TryGetJob::JobCompleted(result) => return result,
+            TryGetJob::JobCompleted(_) => return,
         };
-        self.force_query_with_job::<Q>(key, job, dep_node)
+        if let Err(e) = self.force_query_with_job::<Q>(key, job, dep_node) {
+            self.report_cycle(e).emit();
+        }
     }
 
     pub(super) fn try_get_query<Q: QueryDescription<'gcx>>(
         self,
         span: Span,
         key: Q::Key,
-    ) -> Result<Q::Value, DiagnosticBuilder<'a>> {
+    ) -> Result<Q::Value, Box<DiagnosticBuilder<'a>>> {
         match self.try_get_with::<Q>(span, key) {
             Ok(e) => Ok(e),
             Err(e) => Err(self.report_cycle(e)),
         }
     }
 
+    // FIXME: Try uninlining this
+    #[inline(always)]
     pub(super) fn get_query<Q: QueryDescription<'gcx>>(
         self,
         span: Span,
         key: Q::Key,
     ) -> Q::Value {
-        self.try_get_query::<Q>(span, key).unwrap_or_else(|mut e| {
-            e.emit();
-            Q::handle_cycle_error(self)
+        self.try_get_with::<Q>(span, key).unwrap_or_else(|e| {
+            self.emit_error::<Q>(e)
         })
     }
+
+    #[inline(never)]
+    #[cold]
+    fn emit_error<Q: QueryDescription<'gcx>>(
+        self,
+        e: Box<CycleError<'gcx>>,
+    ) -> Q::Value {
+        self.report_cycle(e).emit();
+        Q::handle_cycle_error(self)
+    }
 }
 
 macro_rules! handle_cycle_error {
@@ -673,8 +812,6 @@ macro_rules! define_queries_inner {
         [$($modifiers:tt)*] fn $name:ident: $node:ident($K:ty) -> $V:ty,)*) => {
 
         use std::mem;
-        #[cfg(parallel_queries)]
-        use ty::query::job::QueryResult;
         use rustc_data_structures::sync::Lock;
         use {
             rustc_data_structures::stable_hasher::HashStable,
@@ -711,13 +848,7 @@ macro_rules! define_queries_inner {
                 // deadlock handler, and this shouldn't be locked
                 $(
                     jobs.extend(
-                        self.$name.try_lock().unwrap().active.values().filter_map(|v|
-                            if let QueryResult::Started(ref job) = *v {
-                                Some(job.clone())
-                            } else {
-                                None
-                            }
-                        )
+                        self.$name.try_lock().unwrap().active.values().cloned()
                     );
                 )*
 
@@ -787,16 +918,6 @@ macro_rules! define_queries_inner {
             })*
         }
 
-        // This module and the functions in it exist only to provide a
-        // predictable symbol name prefix for query providers. This is helpful
-        // for analyzing queries in profilers.
-        pub(super) mod __query_compute {
-            $(#[inline(never)]
-            pub fn $name<F: FnOnce() -> R, R>(f: F) -> R {
-                f()
-            })*
-        }
-
         $(impl<$tcx> QueryConfig<$tcx> for queries::$name<$tcx> {
             type Key = $K;
             type Value = $V;
@@ -806,33 +927,37 @@ macro_rules! define_queries_inner {
         }
 
         impl<$tcx> QueryAccessors<$tcx> for queries::$name<$tcx> {
+            #[inline(always)]
             fn query(key: Self::Key) -> Query<'tcx> {
                 Query::$name(key)
             }
 
+            #[inline(always)]
             fn query_cache<'a>(tcx: TyCtxt<'a, $tcx, '_>) -> &'a Lock<QueryCache<$tcx, Self>> {
                 &tcx.queries.$name
             }
 
             #[allow(unused)]
+            #[inline(always)]
             fn to_dep_node(tcx: TyCtxt<'_, $tcx, '_>, key: &Self::Key) -> DepNode {
                 use dep_graph::DepConstructor::*;
 
-                DepNode::new(tcx, $node(*key))
+                DepNode::new_inlined(tcx, $node(*key))
             }
 
-            #[inline]
-            fn compute(tcx: TyCtxt<'_, 'tcx, '_>, key: Self::Key) -> Self::Value {
-                __query_compute::$name(move || {
-                    let provider = tcx.queries.providers.get(key.query_crate())
-                        // HACK(eddyb) it's possible crates may be loaded after
-                        // the query engine is created, and because crate loading
-                        // is not yet integrated with the query engine, such crates
-                        // would be be missing appropriate entries in `providers`.
-                        .unwrap_or(&tcx.queries.fallback_extern_providers)
-                        .$name;
-                    provider(tcx.global_tcx(), key)
-                })
+            #[inline(always)]
+            fn compute(
+                tcx: TyCtxt<'_, 'tcx, 'tcx>,
+                key: Self::Key
+            ) -> Self::Value {
+                let provider = tcx.queries.providers.get(key.query_crate())
+                    // HACK(eddyb) it's possible crates may be loaded after
+                    // the query engine is created, and because crate loading
+                    // is not yet integrated with the query engine, such crates
+                    // would be be missing appropriate entries in `providers`.
+                    .unwrap_or(&tcx.queries.fallback_extern_providers)
+                    .$name;
+                provider(tcx, key)
             }
 
             fn handle_cycle_error(tcx: TyCtxt<'_, 'tcx, '_>) -> Self::Value {
@@ -849,7 +974,7 @@ macro_rules! define_queries_inner {
             ///
             /// Note: The optimization is only available during incr. comp.
             pub fn ensure(tcx: TyCtxt<'a, $tcx, 'lcx>, key: $K) -> () {
-                tcx.ensure_query::<queries::$name<'_>>(key);
+                tcx.global_tcx().ensure_query::<queries::$name<'_>>(key);
             }
         })*
 
@@ -861,6 +986,7 @@ macro_rules! define_queries_inner {
 
         impl<'a, 'gcx, 'tcx> Deref for TyCtxtAt<'a, 'gcx, 'tcx> {
             type Target = TyCtxt<'a, 'gcx, 'tcx>;
+            #[inline(always)]
             fn deref(&self) -> &Self::Target {
                 &self.tcx
             }
@@ -869,6 +995,7 @@ macro_rules! define_queries_inner {
         impl<'a, $tcx, 'lcx> TyCtxt<'a, $tcx, 'lcx> {
             /// Return a transparent wrapper for `TyCtxt` which uses
             /// `span` as the location of queries performed through it.
+            #[inline(always)]
             pub fn at(self, span: Span) -> TyCtxtAt<'a, $tcx, 'lcx> {
                 TyCtxtAt {
                     tcx: self,
@@ -877,6 +1004,7 @@ macro_rules! define_queries_inner {
             }
 
             $($(#[$attr])*
+            #[inline(always)]
             pub fn $name(self, key: $K) -> $V {
                 self.at(DUMMY_SP).$name(key)
             })*
@@ -884,8 +1012,9 @@ macro_rules! define_queries_inner {
 
         impl<'a, $tcx, 'lcx> TyCtxtAt<'a, $tcx, 'lcx> {
             $($(#[$attr])*
+            #[inline(always)]
             pub fn $name(self, key: $K) -> $V {
-                self.tcx.get_query::<queries::$name<'_>>(self.span, key)
+                self.tcx.global_tcx().get_query::<queries::$name<'_>>(self.span, key)
             })*
         }
 
@@ -1023,20 +1152,11 @@ pub fn force_from_dep_node<'a, 'gcx, 'lcx>(tcx: TyCtxt<'a, 'gcx, 'lcx>,
     macro_rules! force {
         ($query:ident, $key:expr) => {
             {
-                use $crate::util::common::{ProfileQueriesMsg, profq_msg};
-
-                profq_msg!(tcx,
-                    ProfileQueriesMsg::QueryBegin(
-                        DUMMY_SP.data(),
-                        profq_query_msg!(::ty::query::queries::$query::NAME, tcx, $key),
-                    )
+                tcx.global_tcx().force_query::<::ty::query::queries::$query<'_>>(
+                    $key,
+                    DUMMY_SP,
+                    *dep_node
                 );
-
-                if let Err(e) = tcx.force_query::<::ty::query::queries::$query<'_>>(
-                    $key, DUMMY_SP, *dep_node
-                ) {
-                    tcx.report_cycle(e).emit();
-                }
             }
         }
     };
diff --git a/src/librustc_data_structures/by_move.rs b/src/librustc_data_structures/by_move.rs
new file mode 100644
index 0000000000000..38c91d61d96f5
--- /dev/null
+++ b/src/librustc_data_structures/by_move.rs
@@ -0,0 +1,47 @@
+
+use std::mem::ManuallyDrop;
+use std::mem::MaybeUninit;
+use std::ops::{Deref, DerefMut};
+
+pub type MoveSlot<'a, T> = Move<'a, MaybeUninit<T>>;
+
+pub struct Move<'a, T>(&'a mut ManuallyDrop<T>);
+
+impl<'a, T> Move<'a, MaybeUninit<T>> {
+    pub fn uninit(ptr: &'a mut ManuallyDrop<MaybeUninit<T>>) -> Self {
+        Move(ptr)
+    }
+
+    // Assumes that MaybeUninit is #[repr(transparent)]
+    pub fn init(&mut self, value: T) -> Move<'a, T> {
+        *self.0 = ManuallyDrop::new(MaybeUninit::new(value));
+        Move(unsafe { &mut *(self.0 as *mut _ as *mut ManuallyDrop<T>) })
+    }
+}
+
+#[macro_export]
+#[allow_internal_unstable]
+macro_rules! uninit_slot {
+    () => (&mut std::mem::ManuallyDrop::new(std::mem::MaybeUninit::uninitialized()))
+}
+
+impl<'a, T> Deref for Move<'a, T> {
+    type Target = T;
+    fn deref(&self) -> &Self::Target {
+        &*self.0
+    }
+}
+
+impl<'a, T> DerefMut for Move<'a, T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut *self.0
+    }
+}
+
+impl<'a, T> Drop for Move<'a, T> {
+    fn drop(&mut self) {
+        unsafe {
+            ManuallyDrop::drop(&mut self.0)
+        }
+    }
+}
diff --git a/src/librustc_data_structures/fingerprint.rs b/src/librustc_data_structures/fingerprint.rs
index aa9ddda2b9364..f8638213b3a2f 100644
--- a/src/librustc_data_structures/fingerprint.rs
+++ b/src/librustc_data_structures/fingerprint.rs
@@ -86,6 +86,7 @@ impl ::std::fmt::Display for Fingerprint {
 }
 
 impl stable_hasher::StableHasherResult for Fingerprint {
+    #[inline]
     fn finish(hasher: stable_hasher::StableHasher<Self>) -> Self {
         let (_0, _1) = hasher.finalize();
         Fingerprint(_0, _1)
diff --git a/src/librustc_data_structures/lib.rs b/src/librustc_data_structures/lib.rs
index 8e0ecb70c6896..c5fbcb68b5dc9 100644
--- a/src/librustc_data_structures/lib.rs
+++ b/src/librustc_data_structures/lib.rs
@@ -30,6 +30,10 @@
 #![feature(allow_internal_unstable)]
 #![feature(vec_resize_with)]
 #![feature(hash_raw_entry)]
+#![feature(stmt_expr_attributes)]
+#![feature(core_intrinsics)]
+#![feature(integer_atomics)]
+#![feature(maybe_uninit)]
 
 #![cfg_attr(unix, feature(libc))]
 #![cfg_attr(test, feature(test))]
@@ -58,6 +62,26 @@ extern crate rustc_cratesio_shim;
 
 pub use rustc_serialize::hex::ToHex;
 
+#[macro_export]
+macro_rules! likely {
+      ($e:expr) => {
+            #[allow(unused_unsafe)]
+            {
+                  unsafe { std::intrinsics::likely($e) }
+            }
+      }
+}
+
+#[macro_export]
+macro_rules! unlikely {
+    ($e:expr) => {
+            #[allow(unused_unsafe)]
+            {
+                  unsafe { std::intrinsics::unlikely($e) }
+            }
+      }
+}
+
 pub mod macros;
 pub mod svh;
 pub mod base_n;
@@ -80,6 +104,8 @@ pub mod sorted_map;
 pub mod sync;
 pub mod tiny_list;
 pub mod thin_vec;
+pub mod vec;
+pub mod by_move;
 pub mod transitive_relation;
 pub use ena::unify;
 pub mod vec_linked_list;
@@ -91,12 +117,14 @@ pub struct OnDrop<F: Fn()>(pub F);
 impl<F: Fn()> OnDrop<F> {
       /// Forgets the function which prevents it from running.
       /// Ensure that the function owns no memory, otherwise it will be leaked.
+      #[inline]
       pub fn disable(self) {
             std::mem::forget(self);
       }
 }
 
 impl<F: Fn()> Drop for OnDrop<F> {
+      #[inline]
       fn drop(&mut self) {
             (self.0)();
       }
diff --git a/src/librustc_data_structures/sync.rs b/src/librustc_data_structures/sync.rs
index 6a4012c81984d..315fdc6f7b2bd 100644
--- a/src/librustc_data_structures/sync.rs
+++ b/src/librustc_data_structures/sync.rs
@@ -20,10 +20,6 @@
 //! It internally uses `parking_lot::RwLock` if cfg!(parallel_queries) is true,
 //! `RefCell` otherwise.
 //!
-//! `LockCell` is a thread safe version of `Cell`, with `set` and `get` operations.
-//! It can never deadlock. It uses `Cell` when
-//! cfg!(parallel_queries) is false, otherwise it is a `Lock`.
-//!
 //! `MTLock` is a mutex which disappears if cfg!(parallel_queries) is false.
 //!
 //! `MTRef` is a immutable reference if cfg!(parallel_queries), and an mutable reference otherwise.
@@ -33,12 +29,9 @@
 
 use std::collections::HashMap;
 use std::hash::{Hash, BuildHasher};
-use std::cmp::Ordering;
 use std::marker::PhantomData;
-use std::fmt::Debug;
-use std::fmt::Formatter;
-use std::fmt;
 use std::ops::{Deref, DerefMut};
+use std::mem::ManuallyDrop;
 use owning_ref::{Erased, OwningRef};
 
 pub fn serial_join<A, B, RA, RB>(oper_a: A, oper_b: B) -> (RA, RB)
@@ -64,6 +57,9 @@ pub fn serial_scope<F, R>(f: F) -> R
     f(&SerialScope)
 }
 
+pub use std::sync::atomic::Ordering::SeqCst;
+pub use std::sync::atomic::Ordering;
+
 cfg_if! {
     if #[cfg(not(parallel_queries))] {
         pub auto trait Send {}
@@ -79,6 +75,62 @@ cfg_if! {
             }
         }
 
+        use std::ops::Add;
+
+        #[derive(Debug)]
+        pub struct Atomic<T: Copy>(Cell<T>);
+
+        impl<T: Copy> Atomic<T> {
+            pub fn new(v: T) -> Self {
+                Atomic(Cell::new(v))
+            }
+        }
+
+        impl<T: Copy + PartialEq> Atomic<T> {
+            pub fn into_inner(self) -> T {
+                self.0.into_inner()
+            }
+
+            pub fn load(&self, _: Ordering) -> T {
+                self.0.get()
+            }
+
+            pub fn store(&self, val: T, _: Ordering) {
+                self.0.set(val)
+            }
+
+            pub fn swap(&self, val: T, _: Ordering) -> T {
+                self.0.replace(val)
+            }
+
+            pub fn compare_exchange(&self,
+                                    current: T,
+                                    new: T,
+                                    _: Ordering,
+                                    _: Ordering)
+                                    -> Result<T, T> {
+                let read = self.0.get();
+                if read == current {
+                    self.0.set(new);
+                    Ok(read)
+                } else {
+                    Err(read)
+                }
+            }
+        }
+
+        impl<T: Add<Output=T> + Copy> Atomic<T> {
+            pub fn fetch_add(&self, val: T, _: Ordering) -> T {
+                let old = self.0.get();
+                self.0.set(old + val);
+                old
+            }
+        }
+
+        pub type AtomicUsize = Atomic<usize>;
+        pub type AtomicBool = Atomic<bool>;
+        pub type AtomicU64 = Atomic<u64>;
+
         pub use self::serial_join as join;
         pub use self::serial_scope as scope;
 
@@ -170,47 +222,6 @@ cfg_if! {
                 MTLock(self.0.clone())
             }
         }
-
-        pub struct LockCell<T>(Cell<T>);
-
-        impl<T> LockCell<T> {
-            #[inline(always)]
-            pub fn new(inner: T) -> Self {
-                LockCell(Cell::new(inner))
-            }
-
-            #[inline(always)]
-            pub fn into_inner(self) -> T {
-                self.0.into_inner()
-            }
-
-            #[inline(always)]
-            pub fn set(&self, new_inner: T) {
-                self.0.set(new_inner);
-            }
-
-            #[inline(always)]
-            pub fn get(&self) -> T where T: Copy {
-                self.0.get()
-            }
-
-            #[inline(always)]
-            pub fn set_mut(&mut self, new_inner: T) {
-                self.0.set(new_inner);
-            }
-
-            #[inline(always)]
-            pub fn get_mut(&mut self) -> T where T: Copy {
-                self.0.get()
-            }
-        }
-
-        impl<T> LockCell<Option<T>> {
-            #[inline(always)]
-            pub fn take(&self) -> Option<T> {
-                unsafe { (*self.0.as_ptr()).take() }
-            }
-        }
     } else {
         pub use std::marker::Send as Send;
         pub use std::marker::Sync as Sync;
@@ -223,6 +234,8 @@ cfg_if! {
         pub use parking_lot::MutexGuard as LockGuard;
         pub use parking_lot::MappedMutexGuard as MappedLockGuard;
 
+        pub use std::sync::atomic::{AtomicBool, AtomicUsize, AtomicU64};
+
         pub use std::sync::Arc as Lrc;
         pub use std::sync::Weak as Weak;
 
@@ -288,47 +301,6 @@ cfg_if! {
                 v.erase_send_sync_owner()
             }}
         }
-
-        pub struct LockCell<T>(Lock<T>);
-
-        impl<T> LockCell<T> {
-            #[inline(always)]
-            pub fn new(inner: T) -> Self {
-                LockCell(Lock::new(inner))
-            }
-
-            #[inline(always)]
-            pub fn into_inner(self) -> T {
-                self.0.into_inner()
-            }
-
-            #[inline(always)]
-            pub fn set(&self, new_inner: T) {
-                *self.0.lock() = new_inner;
-            }
-
-            #[inline(always)]
-            pub fn get(&self) -> T where T: Copy {
-                *self.0.lock()
-            }
-
-            #[inline(always)]
-            pub fn set_mut(&mut self, new_inner: T) {
-                *self.0.get_mut() = new_inner;
-            }
-
-            #[inline(always)]
-            pub fn get_mut(&mut self) -> T where T: Copy {
-                *self.0.get_mut()
-            }
-        }
-
-        impl<T> LockCell<Option<T>> {
-            #[inline(always)]
-            pub fn take(&self) -> Option<T> {
-                self.0.lock().take()
-            }
-        }
     }
 }
 
@@ -476,65 +448,6 @@ impl<T> Once<T> {
     }
 }
 
-impl<T: Copy + Debug> Debug for LockCell<T> {
-    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        f.debug_struct("LockCell")
-            .field("value", &self.get())
-            .finish()
-    }
-}
-
-impl<T:Default> Default for LockCell<T> {
-    /// Creates a `LockCell<T>`, with the `Default` value for T.
-    #[inline]
-    fn default() -> LockCell<T> {
-        LockCell::new(Default::default())
-    }
-}
-
-impl<T:PartialEq + Copy> PartialEq for LockCell<T> {
-    #[inline]
-    fn eq(&self, other: &LockCell<T>) -> bool {
-        self.get() == other.get()
-    }
-}
-
-impl<T:Eq + Copy> Eq for LockCell<T> {}
-
-impl<T:PartialOrd + Copy> PartialOrd for LockCell<T> {
-    #[inline]
-    fn partial_cmp(&self, other: &LockCell<T>) -> Option<Ordering> {
-        self.get().partial_cmp(&other.get())
-    }
-
-    #[inline]
-    fn lt(&self, other: &LockCell<T>) -> bool {
-        self.get() < other.get()
-    }
-
-    #[inline]
-    fn le(&self, other: &LockCell<T>) -> bool {
-        self.get() <= other.get()
-    }
-
-    #[inline]
-    fn gt(&self, other: &LockCell<T>) -> bool {
-        self.get() > other.get()
-    }
-
-    #[inline]
-    fn ge(&self, other: &LockCell<T>) -> bool {
-        self.get() >= other.get()
-    }
-}
-
-impl<T:Ord + Copy> Ord for LockCell<T> {
-    #[inline]
-    fn cmp(&self, other: &LockCell<T>) -> Ordering {
-        self.get().cmp(&other.get())
-    }
-}
-
 #[derive(Debug)]
 pub struct Lock<T>(InnerLock<T>);
 
@@ -757,3 +670,40 @@ impl<T> DerefMut for OneThread<T> {
         &mut self.inner
     }
 }
+
+pub struct LrcRef<'a, T: ?Sized>(&'a T);
+
+impl<'a, T: 'a + ?Sized> Clone for LrcRef<'a, T> {
+    fn clone(&self) -> Self {
+        LrcRef(self.0)
+    }
+}
+impl<'a, T: 'a + ?Sized> Copy for LrcRef<'a, T> {}
+
+impl<'a, T: 'a + ?Sized> LrcRef<'a, T> {
+    #[inline]
+    pub fn new(lrc: &Lrc<T>) -> LrcRef<'_, T> {
+        LrcRef(&*lrc)
+    }
+
+    #[inline]
+    pub fn into(self) -> Lrc<T> {
+        unsafe {
+            // We know that we have a reference to an Lrc here.
+            // Pretend to take ownership of the Lrc with from_raw
+            // and the clone a new one.
+            // We use ManuallyDrop to ensure the reference count
+            // isn't decreased
+            let lrc = ManuallyDrop::new(Lrc::from_raw(self.0));
+            (*lrc).clone()
+        }
+    }
+}
+
+impl<'a, T: ?Sized> Deref for LrcRef<'a, T> {
+    type Target = T;
+
+    fn deref(&self) -> &T {
+        self.0
+    }
+}
diff --git a/src/librustc_data_structures/vec.rs b/src/librustc_data_structures/vec.rs
new file mode 100644
index 0000000000000..dd6830c7eec2d
--- /dev/null
+++ b/src/librustc_data_structures/vec.rs
@@ -0,0 +1,48 @@
+use smallvec::{SmallVec, Array};
+use std::ptr;
+
+pub trait SmallVecExt<A: Array> {
+    fn push_light(&mut self, v: A::Item);
+}
+
+#[inline(never)]
+#[cold]
+fn push_light_cold<A: Array>(vec: &mut SmallVec<A>, v: A::Item) {
+    if likely!(vec.spilled()) {
+        let len = vec.len();
+        if likely!(len < vec.capacity()) {
+            unsafe {
+                ptr::write(vec.as_mut_ptr().offset(len as isize), v);
+                vec.set_len(len + 1);
+            }
+        } else {
+            vec.push(v)
+        }
+    } else {
+        unsafe {
+            std::intrinsics::assume(vec.capacity() == A::size());
+        }
+        vec.push(v)
+    }
+}
+
+impl<A: Array> SmallVecExt<A> for SmallVec<A> {
+    #[inline(always)]
+    fn push_light(&mut self, v: A::Item) {
+        let (free, len) = if !self.spilled() {
+            let len = self.len();
+            (len < A::size(), len)
+        } else {
+            (false, 0)
+        };
+        if likely!(free) {
+            unsafe {
+                ptr::write(self.as_mut_ptr().offset(len as isize), v);
+                std::intrinsics::assume(!self.spilled());
+                self.set_len(self.len() + 1);
+            }
+        } else {
+            push_light_cold(self, v);
+        }
+    }
+}
diff --git a/src/librustc_driver/lib.rs b/src/librustc_driver/lib.rs
index 45d107f13a0fd..0ebfc623ab515 100644
--- a/src/librustc_driver/lib.rs
+++ b/src/librustc_driver/lib.rs
@@ -27,6 +27,7 @@
 #![feature(set_stdio)]
 #![feature(rustc_stack_internals)]
 #![feature(no_debug)]
+#![feature(integer_atomics)]
 
 #![recursion_limit="256"]
 
@@ -78,7 +79,7 @@ use pretty::{PpMode, UserIdentifiedItem};
 use rustc_resolve as resolve;
 use rustc_save_analysis as save;
 use rustc_save_analysis::DumpHandler;
-use rustc_data_structures::sync::{self, Lrc};
+use rustc_data_structures::sync::{self, Lrc, Ordering::SeqCst};
 use rustc_data_structures::OnDrop;
 use rustc::session::{self, config, Session, build_session, CompileResult};
 use rustc::session::CompileIncomplete;
@@ -954,7 +955,7 @@ impl<'a> CompilerCalls<'a> for RustcDefaultCalls {
                 let sess = state.session;
                 eprintln!("Fuel used by {}: {}",
                     sess.print_fuel_crate.as_ref().unwrap(),
-                    sess.print_fuel.get());
+                    sess.print_fuel.load(SeqCst));
             }
         }
         control
diff --git a/src/librustc_errors/lib.rs b/src/librustc_errors/lib.rs
index b6528cbe2c810..59fa78a85f633 100644
--- a/src/librustc_errors/lib.rs
+++ b/src/librustc_errors/lib.rs
@@ -36,15 +36,13 @@ use self::Level::*;
 
 use emitter::{Emitter, EmitterWriter};
 
-use rustc_data_structures::sync::{self, Lrc, Lock, LockCell};
+use rustc_data_structures::sync::{self, Lrc, Lock, AtomicUsize, AtomicBool, SeqCst};
 use rustc_data_structures::fx::FxHashSet;
 use rustc_data_structures::stable_hasher::StableHasher;
 
 use std::borrow::Cow;
 use std::cell::Cell;
 use std::{error, fmt};
-use std::sync::atomic::AtomicUsize;
-use std::sync::atomic::Ordering::SeqCst;
 use std::panic;
 
 use termcolor::{ColorSpec, Color};
@@ -281,7 +279,7 @@ pub struct Handler {
 
     err_count: AtomicUsize,
     emitter: Lock<Box<dyn Emitter + sync::Send>>,
-    continue_after_error: LockCell<bool>,
+    continue_after_error: AtomicBool,
     delayed_span_bugs: Lock<Vec<Diagnostic>>,
 
     // This set contains the `DiagnosticId` of all emitted diagnostics to avoid
@@ -380,7 +378,7 @@ impl Handler {
             flags,
             err_count: AtomicUsize::new(0),
             emitter: Lock::new(e),
-            continue_after_error: LockCell::new(true),
+            continue_after_error: AtomicBool::new(true),
             delayed_span_bugs: Lock::new(Vec::new()),
             taught_diagnostics: Default::default(),
             emitted_diagnostic_codes: Default::default(),
@@ -389,7 +387,7 @@ impl Handler {
     }
 
     pub fn set_continue_after_error(&self, continue_after_error: bool) {
-        self.continue_after_error.set(continue_after_error);
+        self.continue_after_error.store(continue_after_error, SeqCst);
     }
 
     /// Resets the diagnostic error count as well as the cached emitted diagnostics.
@@ -668,7 +666,7 @@ impl Handler {
         let mut db = DiagnosticBuilder::new(self, lvl, msg);
         db.set_span(msp.clone());
         db.emit();
-        if !self.continue_after_error.get() {
+        if !self.continue_after_error.load(SeqCst) {
             self.abort_if_errors();
         }
     }
@@ -679,7 +677,7 @@ impl Handler {
         let mut db = DiagnosticBuilder::new_with_code(self, lvl, Some(code), msg);
         db.set_span(msp.clone());
         db.emit();
-        if !self.continue_after_error.get() {
+        if !self.continue_after_error.load(SeqCst) {
             self.abort_if_errors();
         }
     }
diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs
index 528942df29fee..ab3087d99e954 100644
--- a/src/librustc_mir/monomorphize/partitioning.rs
+++ b/src/librustc_mir/monomorphize/partitioning.rs
@@ -212,7 +212,7 @@ pub trait CodegenUnitExt<'tcx> {
     }
 
     fn codegen_dep_node(&self, tcx: TyCtxt<'_, 'tcx, 'tcx>) -> DepNode {
-        DepNode::new(tcx, DepConstructor::CompileCodegenUnit(self.name().clone()))
+        DepNode::new_inlined(tcx, DepConstructor::CompileCodegenUnit(self.name().clone()))
     }
 }
 
diff --git a/src/libserialize/opaque.rs b/src/libserialize/opaque.rs
index 4ce80bc36a080..3eb7bc14a927f 100644
--- a/src/libserialize/opaque.rs
+++ b/src/libserialize/opaque.rs
@@ -172,6 +172,7 @@ pub struct Decoder<'a> {
 }
 
 impl<'a> Decoder<'a> {
+    #[inline]
     pub fn new(data: &'a [u8], position: usize) -> Decoder<'a> {
         Decoder {
             data,
diff --git a/src/libstd/collections/hash/table.rs b/src/libstd/collections/hash/table.rs
index 479e6dccb90dd..7195175db28a6 100644
--- a/src/libstd/collections/hash/table.rs
+++ b/src/libstd/collections/hash/table.rs
@@ -740,6 +740,7 @@ impl<K, V> RawTable<K, V> {
         }
     }
 
+    #[inline]
     fn new_internal(
         capacity: usize,
         fallibility: Fallibility,
@@ -755,12 +756,14 @@ impl<K, V> RawTable<K, V> {
 
     /// Tries to create a new raw table from a given capacity. If it cannot allocate,
     /// it returns with AllocErr.
+    #[inline]
     pub fn try_new(capacity: usize) -> Result<RawTable<K, V>, CollectionAllocErr> {
         Self::new_internal(capacity, Fallible)
     }
 
     /// Creates a new raw table from a given capacity. All buckets are
     /// initially empty.
+    #[inline]
     pub fn new(capacity: usize) -> RawTable<K, V> {
         match Self::new_internal(capacity, Infallible) {
             Err(CollectionAllocErr::CapacityOverflow) => panic!("capacity overflow"),
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
index 866dba24dcb0a..f8fafaf701d3b 100644
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -81,6 +81,7 @@ impl ParseSess {
         }
     }
 
+    #[inline]
     pub fn source_map(&self) -> &SourceMap {
         &self.source_map
     }