Skip to content

Commit cbfdb3f

Browse files
timholyvchuravyvtjnash
committed
Replace the .ji serialization with sysimage format
This unifies two serializers, `dump.c` (used for packages) and `staticdata.c` (used for system images). It adopts the `staticdata` strategy, adding support for external linkage, uniquing of MethodInstances & types, method extensions, external specializations, and invalidation. This lays the groundwork for native code caching as done with system images. Co-authored-by: Valentin Churavy <[email protected]> Co-authored-by: Jameson Nash <[email protected]> Co-authored-by: Tim Holy <[email protected]>
1 parent 54a9c2f commit cbfdb3f

39 files changed

+3485
-4433
lines changed

base/compiler/typeinfer.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

3-
# Tracking of newly-inferred MethodInstances during precompilation
3+
# Tracking of newly-inferred CodeInstances during precompilation
44
const track_newly_inferred = RefValue{Bool}(false)
5-
const newly_inferred = MethodInstance[]
5+
const newly_inferred = CodeInstance[]
66

77
# build (and start inferring) the inference frame for the top-level MethodInstance
88
function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
@@ -403,11 +403,11 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
403403
# TODO: also don't store inferred code if we've previously decided to interpret this function
404404
if !already_inferred
405405
inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result)
406-
code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds)
406+
code_cache(interp)[linfo] = ci = CodeInstance(result, inferred_result, valid_worlds)
407407
if track_newly_inferred[]
408408
m = linfo.def
409409
if isa(m, Method) && m.module != Core
410-
ccall(:jl_push_newly_inferred, Cvoid, (Any,), linfo)
410+
ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci)
411411
end
412412
end
413413
end

base/loading.jl

Lines changed: 48 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -898,7 +898,7 @@ function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any}
898898
end
899899

900900
@debug "Loading cache file $path for $pkg"
901-
sv = ccall(:jl_restore_incremental, Any, (Cstring, Any), path, depmods)
901+
sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint), path, depmods, false)
902902
if isa(sv, Exception)
903903
return sv
904904
end
@@ -973,7 +973,7 @@ function run_package_callbacks(modkey::PkgId)
973973
end
974974

975975
# loads a precompile cache file, after checking stale_cachefile tests
976-
function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64)
976+
function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128)
977977
assert_havelock(require_lock)
978978
loaded = nothing
979979
if root_module_exists(modkey)
@@ -1021,7 +1021,7 @@ function _tryrequire_from_serialized(modkey::PkgId, path::String, sourcepath::St
10211021
for i in 1:length(depmods)
10221022
dep = depmods[i]
10231023
dep isa Module && continue
1024-
_, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt64}
1024+
_, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
10251025
@assert root_module_exists(depkey)
10261026
dep = root_module(depkey)
10271027
depmods[i] = dep
@@ -1052,7 +1052,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String)
10521052
local depmodnames
10531053
io = open(path, "r")
10541054
try
1055-
isvalid_cache_header(io) || return ArgumentError("Invalid header in cache file $path.")
1055+
iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.")
10561056
depmodnames = parse_cache_header(io)[3]
10571057
isvalid_file_crc(io) || return ArgumentError("Invalid checksum in cache file $path.")
10581058
finally
@@ -1074,7 +1074,7 @@ end
10741074

10751075
# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it
10761076
# returns the set of modules restored if the cache load succeeded
1077-
@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt64)
1077+
@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128)
10781078
assert_havelock(require_lock)
10791079
paths = find_all_in_cache_path(pkg)
10801080
for path_to_try in paths::Vector{String}
@@ -1087,7 +1087,7 @@ end
10871087
for i in 1:length(staledeps)
10881088
dep = staledeps[i]
10891089
dep isa Module && continue
1090-
modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt64}
1090+
modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
10911091
modpaths = find_all_in_cache_path(modkey)
10921092
modfound = false
10931093
for modpath_to_try in modpaths::Vector{String}
@@ -1101,7 +1101,7 @@ end
11011101
break
11021102
end
11031103
if !modfound
1104-
@debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $modbuild_id is missing from the cache."
1104+
@debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
11051105
staledeps = true
11061106
break
11071107
end
@@ -1153,7 +1153,7 @@ const package_callbacks = Any[]
11531153
const include_callbacks = Any[]
11541154

11551155
# used to optionally track dependencies when requiring a module:
1156-
const _concrete_dependencies = Pair{PkgId,UInt64}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
1156+
const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
11571157
const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled
11581158
const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies
11591159
function _include_dependency(mod::Module, _path::AbstractString)
@@ -1406,7 +1406,7 @@ function _require(pkg::PkgId, env=nothing)
14061406

14071407
# attempt to load the module file via the precompile cache locations
14081408
if JLOptions().use_compiled_modules != 0
1409-
m = _require_search_from_serialized(pkg, path, UInt64(0))
1409+
m = _require_search_from_serialized(pkg, path, UInt128(0))
14101410
if m isa Module
14111411
return m
14121412
end
@@ -1416,7 +1416,7 @@ function _require(pkg::PkgId, env=nothing)
14161416
# but it was not handled by the precompile loader, complain
14171417
for (concrete_pkg, concrete_build_id) in _concrete_dependencies
14181418
if pkg == concrete_pkg
1419-
@warn """Module $(pkg.name) with build ID $concrete_build_id is missing from the cache.
1419+
@warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache.
14201420
This may mean $pkg does not support precompilation but is imported by a module that does."""
14211421
if JLOptions().incremental != 0
14221422
# during incremental precompilation, this should be fail-fast
@@ -1785,9 +1785,13 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
17851785
close(tmpio)
17861786
p = create_expr_cache(pkg, path, tmppath, concrete_deps, internal_stderr, internal_stdout)
17871787
if success(p)
1788-
# append checksum to the end of the .ji file:
1789-
open(tmppath, "a+") do f
1790-
write(f, _crc32c(seekstart(f)))
1788+
# append extra crc to the end of the .ji file:
1789+
open(tmppath, "r+") do f
1790+
if iszero(isvalid_cache_header(f))
1791+
error("Invalid header for $pkg in new cache file $(repr(tmppath)).")
1792+
end
1793+
seekstart(f)
1794+
write(f, _crc32c(f))
17911795
end
17921796
# inherit permission from the source file (and make them writable)
17931797
chmod(tmppath, filemode(path) & 0o777 | 0o200)
@@ -1807,7 +1811,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
18071811
end
18081812
end
18091813

1810-
# this is atomic according to POSIX:
1814+
# this is atomic according to POSIX (not Win32):
18111815
rename(tmppath, cachefile; force=true)
18121816
return cachefile
18131817
end
@@ -1817,13 +1821,16 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
18171821
if p.exitcode == 125
18181822
return PrecompilableError()
18191823
else
1820-
error("Failed to precompile $pkg to $tmppath.")
1824+
error("Failed to precompile $pkg to $(repr(tmppath)).")
18211825
end
18221826
end
18231827

1824-
module_build_id(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), m)
1828+
function module_build_id(m::Module)
1829+
hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
1830+
return (UInt128(hi) << 64) | lo
1831+
end
18251832

1826-
isvalid_cache_header(f::IOStream) = (0 != ccall(:jl_read_verify_header, Cint, (Ptr{Cvoid},), f.ios))
1833+
isvalid_cache_header(f::IOStream) = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid},), f.ios) # returns checksum id or zero
18271834
isvalid_file_crc(f::IOStream) = (_crc32c(seekstart(f), filesize(f) - 4) == read(f, UInt32))
18281835

18291836
struct CacheHeaderIncludes
@@ -1897,13 +1904,14 @@ function parse_cache_header(f::IO)
18971904
totbytes -= 8
18981905
@assert totbytes == 0 "header of cache file appears to be corrupt (totbytes == $(totbytes))"
18991906
# read the list of modules that are required to be present during loading
1900-
required_modules = Vector{Pair{PkgId, UInt64}}()
1907+
required_modules = Vector{Pair{PkgId, UInt128}}()
19011908
while true
19021909
n = read(f, Int32)
19031910
n == 0 && break
19041911
sym = String(read(f, n)) # module name
19051912
uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID
1906-
build_id = read(f, UInt64) # build id
1913+
build_id = UInt128(read(f, UInt64)) << 64
1914+
build_id |= read(f, UInt64)
19071915
push!(required_modules, PkgId(uuid, sym) => build_id)
19081916
end
19091917
return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash
@@ -1912,29 +1920,29 @@ end
19121920
function parse_cache_header(cachefile::String; srcfiles_only::Bool=false)
19131921
io = open(cachefile, "r")
19141922
try
1915-
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
1923+
iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
19161924
ret = parse_cache_header(io)
19171925
srcfiles_only || return ret
1918-
modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = ret
1926+
_, (includes, _), _, srctextpos, _... = ret
19191927
srcfiles = srctext_files(io, srctextpos)
19201928
delidx = Int[]
19211929
for (i, chi) in enumerate(includes)
19221930
chi.filename srcfiles || push!(delidx, i)
19231931
end
19241932
deleteat!(includes, delidx)
1925-
return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash
1933+
return ret
19261934
finally
19271935
close(io)
19281936
end
19291937
end
19301938

19311939

19321940

1933-
preferences_hash(f::IO) = parse_cache_header(f)[end]
1941+
preferences_hash(f::IO) = parse_cache_header(f)[6]
19341942
function preferences_hash(cachefile::String)
19351943
io = open(cachefile, "r")
19361944
try
1937-
if !isvalid_cache_header(io)
1945+
if iszero(isvalid_cache_header(io))
19381946
throw(ArgumentError("Invalid header in cache file $cachefile."))
19391947
end
19401948
return preferences_hash(io)
@@ -1945,22 +1953,22 @@ end
19451953

19461954

19471955
function cache_dependencies(f::IO)
1948-
defs, (includes, requires), modules, srctextpos, prefs, prefs_hash = parse_cache_header(f)
1956+
_, (includes, _), modules, _... = parse_cache_header(f)
19491957
return modules, map(chi -> (chi.filename, chi.mtime), includes) # return just filename and mtime
19501958
end
19511959

19521960
function cache_dependencies(cachefile::String)
19531961
io = open(cachefile, "r")
19541962
try
1955-
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
1963+
iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
19561964
return cache_dependencies(io)
19571965
finally
19581966
close(io)
19591967
end
19601968
end
19611969

19621970
function read_dependency_src(io::IO, filename::AbstractString)
1963-
modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = parse_cache_header(io)
1971+
srctextpos = parse_cache_header(io)[4]
19641972
srctextpos == 0 && error("no source-text stored in cache file")
19651973
seek(io, srctextpos)
19661974
return _read_dependency_src(io, filename)
@@ -1983,7 +1991,7 @@ end
19831991
function read_dependency_src(cachefile::String, filename::AbstractString)
19841992
io = open(cachefile, "r")
19851993
try
1986-
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
1994+
iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
19871995
return read_dependency_src(io, filename)
19881996
finally
19891997
close(io)
@@ -2173,12 +2181,13 @@ get_compiletime_preferences(::Nothing) = String[]
21732181
# returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey
21742182
# otherwise returns the list of dependencies to also check
21752183
@constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false)
2176-
return stale_cachefile(PkgId(""), UInt64(0), modpath, cachefile; ignore_loaded)
2184+
return stale_cachefile(PkgId(""), UInt128(0), modpath, cachefile; ignore_loaded)
21772185
end
2178-
@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt64, modpath::String, cachefile::String; ignore_loaded::Bool = false)
2186+
@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String; ignore_loaded::Bool = false)
21792187
io = open(cachefile, "r")
21802188
try
2181-
if !isvalid_cache_header(io)
2189+
checksum = isvalid_cache_header(io)
2190+
if iszero(checksum)
21822191
@debug "Rejecting cache file $cachefile due to it containing an invalid cache header"
21832192
return true # invalid cache file
21842193
end
@@ -2191,9 +2200,12 @@ end
21912200
@debug "Rejecting cache file $cachefile for $modkey since it is for $id instead"
21922201
return true
21932202
end
2194-
if build_id != UInt64(0) && id.second != build_id
2195-
@debug "Ignoring cache file $cachefile for $modkey since it is does not provide desired build_id"
2196-
return true
2203+
if build_id != UInt128(0)
2204+
id_build = (UInt128(checksum) << 64) | id.second
2205+
if id_build != build_id
2206+
@debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it is does not provide desired build_id ($((UUID(build_id))))"
2207+
return true
2208+
end
21972209
end
21982210
id = id.first
21992211
modules = Dict{PkgId, UInt64}(modules)
@@ -2233,11 +2245,12 @@ end
22332245
for (req_key, req_build_id) in _concrete_dependencies
22342246
build_id = get(modules, req_key, UInt64(0))
22352247
if build_id !== UInt64(0)
2248+
build_id |= UInt128(checksum) << 64
22362249
if build_id === req_build_id
22372250
skip_timecheck = true
22382251
break
22392252
end
2240-
@debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $build_id) for $req_key (want $req_build_id)"
2253+
@debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))"
22412254
return true # cachefile doesn't provide the required version of the dependency
22422255
end
22432256
end

deps/llvm.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,8 @@ LLVM_TOOLS_JLL_TAGS := -llvm_version+$(LLVM_VER_MAJ)
308308
endif
309309

310310
$(eval $(call bb-install,llvm,LLVM,false,true))
311-
$(eval $(call bb-install,clang,CLANG,false,true))
312311
$(eval $(call bb-install,lld,LLD,false,true))
312+
$(eval $(call bb-install,clang,CLANG,false,true))
313313
$(eval $(call bb-install,llvm-tools,LLVM_TOOLS,false,true))
314314

315315
endif # USE_BINARYBUILDER_LLVM

src/Makefile

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ endif
4242

4343
SRCS := \
4444
jltypes gf typemap smallintset ast builtins module interpreter symbol \
45-
dlload sys init task array dump staticdata toplevel jl_uv datatype \
45+
dlload sys init task array staticdata toplevel jl_uv datatype \
4646
simplevector runtime_intrinsics precompile jloptions \
4747
threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
4848
jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
@@ -291,7 +291,6 @@ $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
291291
$(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc
292292
$(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h)
293293
$(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
294-
$(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h)
295294
$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
296295
$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
297296
$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
@@ -317,7 +316,7 @@ $(BUILDDIR)/llvm-remove-addrspaces.o $(BUILDDIR)/llvm-remove-addrspaces.dbg.obj:
317316
$(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h
318317
$(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
319318
$(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c)
320-
$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
319+
$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/staticdata_utils.c $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
321320
$(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h
322321
$(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h
323322
$(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h
@@ -453,7 +452,7 @@ SA_EXCEPTIONS-jloptions.c := -Xanalyzer -analyzer-config -Xana
453452
SA_EXCEPTIONS-subtype.c := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core.uninitialized.Assign;core.UndefinedBinaryOperatorResult"
454453
SA_EXCEPTIONS-codegen.c := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core"
455454
# these need to be annotated (and possibly fixed)
456-
SKIP_IMPLICIT_ATOMICS := dump.c module.c staticdata.c codegen.cpp
455+
SKIP_IMPLICIT_ATOMICS := module.c staticdata.c codegen.cpp
457456
# these need to be annotated (and possibly fixed)
458457
SKIP_GC_CHECK := codegen.cpp rtutils.c
459458

0 commit comments

Comments
 (0)