-
Notifications
You must be signed in to change notification settings - Fork 567
WIP: Introduce Ftrace in the syscall section #180
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
f7cc340
9b7be18
6626ce8
7486076
b44bf10
2df343f
7f5cf1f
5a71f73
3b0cc7d
8cf0d6d
beb9272
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
/** | ||
* syscall-ftrace.c | ||
* | ||
* System call "stealing" with ftrace | ||
* | ||
* We create a callback function that contains | ||
* an unconditional jump to our spying function, | ||
* which will then return control to the original one. | ||
* | ||
* The callback function is triggered by ftrace. | ||
*/ | ||
|
||
#include <linux/kernel.h> | ||
#include <linux/init.h> | ||
#include <linux/module.h> | ||
#include <linux/moduleparam.h> | ||
#include <linux/version.h> | ||
#include <linux/unistd.h> | ||
#include <linux/kprobes.h> | ||
#include <linux/sched.h> | ||
#include <linux/uaccess.h> | ||
#include <linux/slab.h> | ||
#include <linux/ftrace.h> | ||
|
||
MODULE_LICENSE("GPL"); | ||
|
||
#define MAX_FILENAME_SIZE 200 | ||
|
||
#undef pr_fmt | ||
#define pr_fmt(fmt) "[syscall-ftrace] " fmt | ||
|
||
/** UID we want to spy on - will be filled from the command line. */ | ||
static int uid = 0; | ||
module_param(uid, int, 0644); | ||
|
||
/** | ||
* This is a housekeeping structure that saves all information | ||
* needed for hooking. Usage with `PREPARE_HOOK` is recommended. | ||
* | ||
* Example: | ||
* static ftrace_hook_t sys_clone_hook = | ||
* PREPARE_HOOK(__NR_clone, my_sys_clone, &orig_sys_clone) | ||
*/ | ||
typedef struct ftrace_hook { | ||
unsigned long nr; // syscall number from unistd.h | ||
void *new; // hook function | ||
void *orig; // original function | ||
|
||
unsigned long address; // address to the original function | ||
struct ftrace_ops ops; // ftrace structure | ||
} ftrace_hook_t; | ||
|
||
#define PREPARE_HOOK(_nr, _hook, _orig) \ | ||
{ \ | ||
.nr = (_nr), .new = (_hook), .orig = (_orig) \ | ||
} | ||
|
||
static unsigned long **sys_call_table; | ||
|
||
/** | ||
* For the sake of simplicity, only the kprobe method is included. | ||
* If you want to know more about different methods to get | ||
* kallsyms_lookup_name, see syscall.c. | ||
*/ | ||
static int resolve_address(ftrace_hook_t *hook) | ||
{ | ||
static struct kprobe kp = { .symbol_name = "kallsyms_lookup_name" }; | ||
unsigned long (*kallsyms_lookup_name)(const char *name); | ||
|
||
register_kprobe(&kp); | ||
kallsyms_lookup_name = (unsigned long (*)(const char *))kp.addr; | ||
unregister_kprobe(&kp); | ||
|
||
if (!kallsyms_lookup_name) { | ||
pr_err("kallsyms_lookup_name is not found!\n"); | ||
return -1; | ||
} | ||
pr_info("kallsyms_lookup_name is found at 0x%lx\n", | ||
(unsigned long)kallsyms_lookup_name); | ||
|
||
sys_call_table = (unsigned long **)kallsyms_lookup_name("sys_call_table"); | ||
if (!sys_call_table) { | ||
pr_err("sys_call_table is not found!\n"); | ||
return -1; | ||
} | ||
pr_info("sys_call_table is found at 0x%lx\n", | ||
(unsigned long)sys_call_table); | ||
|
||
hook->address = (unsigned long)sys_call_table[hook->nr]; | ||
*((unsigned long *)hook->orig) = hook->address; | ||
return 0; | ||
} | ||
|
||
/** | ||
* This is where the magic happens. | ||
* | ||
* We check whether this function is called by the kernel or this module | ||
* by checking whether parent_ip is within this module. | ||
* | ||
* During the first call, parent_ip points to somewhere in the kernel | ||
* that's not in this module, | ||
* while the second call is in this module | ||
* since it's called from our_sys_openat. | ||
* | ||
* If it is the first call, we modify ip to be our_sys_openat, | ||
* which will pass control to it after ftrace is done. | ||
*/ | ||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0) | ||
static void notrace ftrace_thunk(unsigned long ip, unsigned long parent_ip, | ||
struct ftrace_ops *ops, | ||
struct ftrace_regs *fregs) | ||
{ | ||
ftrace_hook_t *hook = container_of(ops, ftrace_hook_t, ops); | ||
|
||
if (!within_module(parent_ip, THIS_MODULE)) | ||
fregs->regs.ip = (unsigned long)hook->new; | ||
} | ||
|
||
#else /** Version < v5.11 */ | ||
linD026 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
static void notrace ftrace_thunk(unsigned long ip, unsigned long parent_ip, | ||
struct ftrace_ops *ops, struct pt_regs *regs) | ||
{ | ||
ftrace_hook_t *hook = container_of(ops, ftrace_hook_t, ops); | ||
|
||
if (!within_module(parent_ip, THIS_MODULE)) | ||
regs->ip = (unsigned long)hook->new; | ||
} | ||
|
||
#endif /** Version >= v5.11 */ | ||
|
||
static int install_hook(ftrace_hook_t *hook) | ||
{ | ||
int err; | ||
|
||
err = resolve_address(hook); | ||
if (err) | ||
return err; | ||
|
||
/** The callback function */ | ||
hook->ops.func = ftrace_thunk; | ||
/** We need registers and we're modifying ip */ | ||
hook->ops.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY; | ||
/** Only sys_openat should be traced */ | ||
err = ftrace_set_filter_ip(&hook->ops, hook->address, 0, 0); | ||
if (err) { | ||
pr_err("ftrace_set_filter_ip() failed: %d\n", err); | ||
return err; | ||
} | ||
|
||
err = register_ftrace_function(&hook->ops); | ||
if (err) { | ||
pr_err("register_ftrace_function() failed: %d\n", err); | ||
return err; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
static void remove_hook(ftrace_hook_t *hook) | ||
{ | ||
int err; | ||
|
||
err = unregister_ftrace_function(&hook->ops); | ||
if (err) | ||
pr_err("unregister_ftrace_function() failed: %d\n", err); | ||
|
||
/** Disable the trace by setting remove to 1 */ | ||
err = ftrace_set_filter_ip(&hook->ops, hook->address, 1, 0); | ||
if (err) | ||
pr_err("ftrace_set_filter_ip() failed: %d\n", err); | ||
} | ||
|
||
/** For some reason the kernel segfaults when the parameters are expanded. */ | ||
static asmlinkage long (*original_call)(struct pt_regs *regs); | ||
static asmlinkage long our_sys_openat(struct pt_regs *regs) | ||
{ | ||
char *kfilename; | ||
int errcode = 0; | ||
|
||
if (current->cred->uid.val != uid) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still have the warning.
|
||
return original_call(regs); | ||
kfilename = kmalloc(MAX_FILENAME_SIZE * sizeof(char), GFP_KERNEL); | ||
if (!kfilename) | ||
return original_call(regs); | ||
|
||
/** | ||
* This may only work in x86_64 because getting parameters | ||
* from CPU registers is architecture-dependent. | ||
* | ||
* Change regs->si to appropriate registers | ||
* if you are trying on different architecture. | ||
*/ | ||
errcode = | ||
copy_from_user(kfilename, (char __user *)regs->si, MAX_FILENAME_SIZE); | ||
if (errcode < 0) { | ||
kfree(kfilename); | ||
return original_call(regs); | ||
} | ||
|
||
pr_info("File opened by UID %d: %s\n", uid, kfilename); | ||
kfree(kfilename); | ||
|
||
return original_call(regs); | ||
} | ||
|
||
static ftrace_hook_t sys_openat_hook = | ||
PREPARE_HOOK(__NR_openat, our_sys_openat, &original_call); | ||
|
||
static int __init syscall_ftrace_start(void) | ||
{ | ||
int err; | ||
|
||
err = install_hook(&sys_openat_hook); | ||
if (err) | ||
return err; | ||
pr_info("hooked, spying on UID %d\n", uid); | ||
return 0; | ||
} | ||
|
||
static void __exit syscall_ftrace_end(void) | ||
{ | ||
remove_hook(&sys_openat_hook); | ||
pr_info("removed\n"); | ||
} | ||
|
||
module_init(syscall_ftrace_start); | ||
module_exit(syscall_ftrace_end); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1554,6 +1554,102 @@ \section{System Calls} | |
|
||
\samplec{examples/syscall.c} | ||
|
||
Another technique we can utilize to control the flow of execution of a syscall is \verb|ftrace|. | ||
It is an internal tracer designed to help out developers and designers of systems to find what is going on inside the kernel. | ||
It can be used for debugging or analyzing latencies and performance issues that take place outside of user-space. | ||
It is usually used as an event tracer by attaching callbacks to the beginning of functions in order to record and trace the flow of the kernel. | ||
|
||
\begin{code} | ||
struct ftrace_ops { | ||
ftrace_func_t func; // callback function | ||
unsigned long flags; // ftrace flags | ||
void* private; // any private data | ||
}; | ||
void callback_func(unsigned long ip, unsigned long parent_ip, | ||
struct ftrace_ops *ops, struct pt_regs *regs); | ||
\end{code} | ||
|
||
Above is the basic structure of the callback function, where the parameters are | ||
|
||
\begin{itemize} | ||
\item \cpp|ip|: The instruction pointer of the function being traced. | ||
\item \cpp|parent_ip|: The instruction pointer of the caller of the traced function. | ||
\item \cpp|ops|: A pointer to \cpp|ftrace_ops| that was used to register the callback. | ||
\item \cpp|regs|: If \cpp|FTRACE_OPS_FL_SAVE_REGS| or \cpp|FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED| are set in the \cpp|ftrace_ops| structure, | ||
then this will be pointing to the \cpp|pt_regs| structure like it would be if an breakpoint was placed at the start of the function where \verb|ftrace| was tracing for CPU register access. | ||
Otherwise it either contains garbage, or \cpp|NULL|. | ||
Do notice that in kernel version later than v5.11, this is replaced with \cpp|struct ftrace_regs *fregs|, with the original \cpp|pt_regs| accessible by \cpp|fregs->regs|. | ||
\end{itemize} | ||
|
||
Internally, there's a 5-byte \cpp|call| to \cpp|__fentry__| at the beginning (BEFORE function prologue) of a traceable kernel function, which is converted to \cpp|nop| during boot to prevent overhead. When a trace is registered, it is changed back to \cpp|__fentry__| and the registered callback will be executed accordingly. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Separate the line with sentences. |
||
|
||
But callbacks can do more. | ||
Since it's called at the start of a function, | ||
and we have access to CPU registers, | ||
maybe we can ``hijack'' the traced function by modifying the instruction pointer? | ||
Yes, this is possible by enabling \cpp|FTRACE_OPS_FL_IPMODIFY| flag when registering a trace. | ||
It will allow us to modify the instruction pointer register, which will become an unconditional jump after the \verb|ftrace| function. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The clause introduced by |
||
Note that while there can be multiple tracers on one function, only one tracer that changes \cpp|ip| can be registered at the same time. | ||
|
||
Figure~\ref{img:ftrace-hooking-example} gives an example of auditing \cpp|sys_execve| by hooking it using \verb|ftrace|. | ||
The callback function (\cpp|fh_ftrace_thunk|) checks whether the call is from the kernel or the module, | ||
and passes control accordingly. | ||
If the call is from the kernel, our auditing function is called. | ||
Otherwise, nothing happens. | ||
The check is important because we're only ``decorating'' the original syscall. | ||
Our auditing function contains call to the original \cpp|sys_execve|, | ||
which will trigger the callback function again. | ||
It'll be an infinite loop if there's no check performed. | ||
|
||
\begin{figure}[h] | ||
\centering | ||
\includegraphics[width=\textwidth]{assets/syscall/flow.jpg} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Utilize TikZ for drawing. See https://texample.net/tikz/examples/pgf-umlsd/ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TikZ examples: https://texample.net/tikz/examples/ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, I have to rotate the figure by 90 degrees to minimize the overflow There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alternatively, you can just render the partial sequences. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think I can rework this diagram in latex because it lacks features I need, like returning to functions other than caller (this is the most important one), annotation. (I'm not the creator of this sequence diagram so obtaining the "original" file is not possible either.) I think the resolution of that jpeg is good enough for even printing, so I think I may left it as is. The latex code I've written and the result\begin{sequencediagram}
\newthread{do_syscall_64}{do\_syscall\_64}
\newinst[1.5]{sys_execve}{sys\_execve}
\newinst[1.5]{ftrace}{[ftrace]}
\newinst[1]{fh_ftrace_thunk}{fh\_ftrace\_thunk}
\newinst[1]{fh_sys_execve}{fh\_sys\_execve}
\postlevel \postlevel \postlevel
\begin{call}{do_syscall_64}{\shortstack{
\cpp|regs-ax=|\\
\cpp|sys_call_table[nr]|\\
\cpp|(regs->di,regs->si|\\
\cpp|regs->dx,regs->r10|\\
\cpp|regs->r8,regs->r9)|
}}{sys_execve}{}
\begin{call}{sys_execve}{call \cpp|__fentry__|}{ftrace}{}
\begin{call}{ftrace}{}{fh_ftrace_thunk}{}
\postlevel
\end{call}
\end{call}
\begin{call}{sys_execve}{hooking}{fh_sys_execve}{\cpp|real_sys_execve()|}
\postlevel
\end{call}
\postlevel
\begin{call}{sys_execve}{call \cpp|__fentry__|}{ftrace}{}
\begin{call}{ftrace}{}{fh_ftrace_thunk}{}
\postlevel
\end{call}
\end{call}
\begin{call}{sys_execve}{}{fh_sys_execve}{}
\end{call}
\end{call}
\end{sequencediagram} |
||
\caption{Linux kernel hooking with ftrace \href{https://www.apriorit.com/dev-blog/546-hooking-linux-functions-2}{Source}} | ||
\label{img:ftrace-hooking-example} | ||
\end{figure} | ||
|
||
In fact, this is what live kernel patches uses. | ||
By redirecting the flow of execution, | ||
end users can use patched functions instead of vulnerable ones without reboot, as figure~\ref{img:kernel-livepatching} shows. | ||
|
||
\begin{figure}[h] | ||
\centering | ||
\includegraphics[width=\textwidth]{assets/syscall/kernel-livepatching1.png}\\ | ||
\vspace{1cm} | ||
\includegraphics[width=\textwidth]{assets/syscall/kernel-livepatching2.png} | ||
\caption{How live kernel patching works. \href{https://ubuntu.com/blog/an-overview-of-live-kernel-patching}{Source}} | ||
\label{img:kernel-livepatching} | ||
\end{figure} | ||
|
||
For more information regarding \verb|ftrace|, check out \href{https://www.kernel.org/doc/html/latest/trace/ftrace.html}{the kernel documentation} and \href{https://youtu.be/93uE_kWWQjs}{this talk from Steven Rostedt}. | ||
|
||
Before getting our hands dirty, here are some functions we need to know. | ||
|
||
\begin{itemize} | ||
\item \cpp|register_ftrace_function(struct ftrace_ops *ops)|: Enable tracing call defined by \cpp|ops| | ||
\item \cpp|unregister_ftrace_function(struct ftrace_ops *ops)|: Disable tracing call defined by \cpp|ops| | ||
\item \cpp|ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset)|: Denote which function should be enabled for tracing by its name. If \cpp|buf| is \cpp|NULL|, all functions will be enabled. | ||
\item \cpp|ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset)|: Denote which function should be enabled for tracing by its address. \cpp|remove| should be \cpp|0| when adding a trace, and \cpp|1| when removing one. Note that \cpp|ip| must be the address where the call to \cpp|__fentry__| is located in the function. | ||
\end{itemize} | ||
|
||
Alright let's write some code. | ||
Below is the source code of the example from above, but rewritten using \verb|ftrace|. | ||
The main difference is the \cpp|install_hook| function, | ||
which prepares our tracee function (\cpp|sys_openat|), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't separate the line with comma. |
||
and the callback function (\cpp|ftrace_thunk|). | ||
We need both \cpp|FTRACE_OPS_FL_SAVE_REGS| and \cpp|FTRACE_OPS_FL_IPMODIFY| because we're modifying \cpp|ip|. | ||
Inside \cpp|ftrace_thunk| is what the magic happens. | ||
We check if it is called from within the module, | ||
if not then it modifies the instruction pointer to our ``spying'' function. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
The check is performed by checking whether \cpp|parent_ip| is within this module. | ||
During the first call, \cpp|parent_ip| points to somewhere within the kernel, | ||
while during the second call it points to somewhere in our ``spying'' function, which is within the module. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto, the comma. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't point out all of the cases. Please check again. |
||
|
||
When inserting the module, you should provide the uid you want to spy on as an parameter. | ||
For example, you can spy on yourself by \verb|sudo insmod syscall-ftrace.ko uid=$UID|. | ||
|
||
\samplec{examples/syscall-ftrace.c} | ||
jserv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
\section{Blocking Processes and threads} | ||
\label{sec:blocking_process_thread} | ||
\subsection{Sleep} | ||
|
Uh oh!
There was an error while loading. Please reload this page.