@@ -5881,8 +5881,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5881
5881
* @child: pointer to task_struct of forking parent process.
5882
5882
*
5883
5883
* A task is associated with the init_css_set until cgroup_post_fork()
5884
- * attaches it to the parent's css_set. Empty cg_list indicates that
5885
- * @child isn't holding reference to its css_set.
5884
+ * attaches it to the target css_set.
5886
5885
*/
5887
5886
void cgroup_fork (struct task_struct * child )
5888
5887
{
@@ -5908,24 +5907,154 @@ static struct cgroup *cgroup_get_from_file(struct file *f)
5908
5907
return cgrp ;
5909
5908
}
5910
5909
5910
+ /**
5911
+ * cgroup_css_set_fork - find or create a css_set for a child process
5912
+ * @kargs: the arguments passed to create the child process
5913
+ *
5914
+ * This functions finds or creates a new css_set which the child
5915
+ * process will be attached to in cgroup_post_fork(). By default,
5916
+ * the child process will be given the same css_set as its parent.
5917
+ *
5918
+ * If CLONE_INTO_CGROUP is specified this function will try to find an
5919
+ * existing css_set which includes the requested cgroup and if not create
5920
+ * a new css_set that the child will be attached to later. If this function
5921
+ * succeeds it will hold cgroup_threadgroup_rwsem on return. If
5922
+ * CLONE_INTO_CGROUP is requested this function will grab cgroup mutex
5923
+ * before grabbing cgroup_threadgroup_rwsem and will hold a reference
5924
+ * to the target cgroup.
5925
+ */
5926
+ static int cgroup_css_set_fork (struct kernel_clone_args * kargs )
5927
+ __acquires (& cgroup_mutex ) __acquires (& cgroup_threadgroup_rwsem )
5928
+ {
5929
+ int ret ;
5930
+ struct cgroup * dst_cgrp = NULL ;
5931
+ struct css_set * cset ;
5932
+ struct super_block * sb ;
5933
+ struct file * f ;
5934
+
5935
+ if (kargs -> flags & CLONE_INTO_CGROUP )
5936
+ mutex_lock (& cgroup_mutex );
5937
+
5938
+ cgroup_threadgroup_change_begin (current );
5939
+
5940
+ spin_lock_irq (& css_set_lock );
5941
+ cset = task_css_set (current );
5942
+ get_css_set (cset );
5943
+ spin_unlock_irq (& css_set_lock );
5944
+
5945
+ if (!(kargs -> flags & CLONE_INTO_CGROUP )) {
5946
+ kargs -> cset = cset ;
5947
+ return 0 ;
5948
+ }
5949
+
5950
+ f = fget_raw (kargs -> cgroup );
5951
+ if (!f ) {
5952
+ ret = - EBADF ;
5953
+ goto err ;
5954
+ }
5955
+ sb = f -> f_path .dentry -> d_sb ;
5956
+
5957
+ dst_cgrp = cgroup_get_from_file (f );
5958
+ if (IS_ERR (dst_cgrp )) {
5959
+ ret = PTR_ERR (dst_cgrp );
5960
+ dst_cgrp = NULL ;
5961
+ goto err ;
5962
+ }
5963
+
5964
+ if (cgroup_is_dead (dst_cgrp )) {
5965
+ ret = - ENODEV ;
5966
+ goto err ;
5967
+ }
5968
+
5969
+ /*
5970
+ * Verify that we the target cgroup is writable for us. This is
5971
+ * usually done by the vfs layer but since we're not going through
5972
+ * the vfs layer here we need to do it "manually".
5973
+ */
5974
+ ret = cgroup_may_write (dst_cgrp , sb );
5975
+ if (ret )
5976
+ goto err ;
5977
+
5978
+ ret = cgroup_attach_permissions (cset -> dfl_cgrp , dst_cgrp , sb ,
5979
+ !(kargs -> flags & CLONE_THREAD ));
5980
+ if (ret )
5981
+ goto err ;
5982
+
5983
+ kargs -> cset = find_css_set (cset , dst_cgrp );
5984
+ if (!kargs -> cset ) {
5985
+ ret = - ENOMEM ;
5986
+ goto err ;
5987
+ }
5988
+
5989
+ put_css_set (cset );
5990
+ fput (f );
5991
+ kargs -> cgrp = dst_cgrp ;
5992
+ return ret ;
5993
+
5994
+ err :
5995
+ cgroup_threadgroup_change_end (current );
5996
+ mutex_unlock (& cgroup_mutex );
5997
+ if (f )
5998
+ fput (f );
5999
+ if (dst_cgrp )
6000
+ cgroup_put (dst_cgrp );
6001
+ put_css_set (cset );
6002
+ if (kargs -> cset )
6003
+ put_css_set (kargs -> cset );
6004
+ return ret ;
6005
+ }
6006
+
6007
+ /**
6008
+ * cgroup_css_set_put_fork - drop references we took during fork
6009
+ * @kargs: the arguments passed to create the child process
6010
+ *
6011
+ * Drop references to the prepared css_set and target cgroup if
6012
+ * CLONE_INTO_CGROUP was requested.
6013
+ */
6014
+ static void cgroup_css_set_put_fork (struct kernel_clone_args * kargs )
6015
+ __releases (& cgroup_threadgroup_rwsem ) __releases (& cgroup_mutex )
6016
+ {
6017
+ cgroup_threadgroup_change_end (current );
6018
+
6019
+ if (kargs -> flags & CLONE_INTO_CGROUP ) {
6020
+ struct cgroup * cgrp = kargs -> cgrp ;
6021
+ struct css_set * cset = kargs -> cset ;
6022
+
6023
+ mutex_unlock (& cgroup_mutex );
6024
+
6025
+ if (cset ) {
6026
+ put_css_set (cset );
6027
+ kargs -> cset = NULL ;
6028
+ }
6029
+
6030
+ if (cgrp ) {
6031
+ cgroup_put (cgrp );
6032
+ kargs -> cgrp = NULL ;
6033
+ }
6034
+ }
6035
+ }
6036
+
5911
6037
/**
5912
6038
* cgroup_can_fork - called on a new task before the process is exposed
5913
6039
* @child: the child process
5914
6040
*
6041
+ * This prepares a new css_set for the child process which the child will
6042
+ * be attached to in cgroup_post_fork().
5915
6043
* This calls the subsystem can_fork() callbacks. If the cgroup_can_fork()
5916
6044
* callback returns an error, the fork aborts with that error code. This
5917
6045
* allows for a cgroup subsystem to conditionally allow or deny new forks.
5918
6046
*/
5919
- int cgroup_can_fork (struct task_struct * child )
5920
- __acquires (& cgroup_threadgroup_rwsem ) __releases (& cgroup_threadgroup_rwsem )
6047
+ int cgroup_can_fork (struct task_struct * child , struct kernel_clone_args * kargs )
5921
6048
{
5922
6049
struct cgroup_subsys * ss ;
5923
6050
int i , j , ret ;
5924
6051
5925
- cgroup_threadgroup_change_begin (current );
6052
+ ret = cgroup_css_set_fork (kargs );
6053
+ if (ret )
6054
+ return ret ;
5926
6055
5927
6056
do_each_subsys_mask (ss , i , have_canfork_callback ) {
5928
- ret = ss -> can_fork (child );
6057
+ ret = ss -> can_fork (child , kargs -> cset );
5929
6058
if (ret )
5930
6059
goto out_revert ;
5931
6060
} while_each_subsys_mask ();
@@ -5937,32 +6066,34 @@ int cgroup_can_fork(struct task_struct *child)
5937
6066
if (j >= i )
5938
6067
break ;
5939
6068
if (ss -> cancel_fork )
5940
- ss -> cancel_fork (child );
6069
+ ss -> cancel_fork (child , kargs -> cset );
5941
6070
}
5942
6071
5943
- cgroup_threadgroup_change_end ( current );
6072
+ cgroup_css_set_put_fork ( kargs );
5944
6073
5945
6074
return ret ;
5946
6075
}
5947
6076
5948
6077
/**
5949
- * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork()
5950
- * @child: the child process
5951
- *
5952
- * This calls the cancel_fork() callbacks if a fork failed *after*
5953
- * cgroup_can_fork() succeded.
5954
- */
5955
- void cgroup_cancel_fork (struct task_struct * child )
5956
- __releases (& cgroup_threadgroup_rwsem )
6078
+ * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork()
6079
+ * @child: the child process
6080
+ * @kargs: the arguments passed to create the child process
6081
+ *
6082
+ * This calls the cancel_fork() callbacks if a fork failed *after*
6083
+ * cgroup_can_fork() succeded and cleans up references we took to
6084
+ * prepare a new css_set for the child process in cgroup_can_fork().
6085
+ */
6086
+ void cgroup_cancel_fork (struct task_struct * child ,
6087
+ struct kernel_clone_args * kargs )
5957
6088
{
5958
6089
struct cgroup_subsys * ss ;
5959
6090
int i ;
5960
6091
5961
6092
for_each_subsys (ss , i )
5962
6093
if (ss -> cancel_fork )
5963
- ss -> cancel_fork (child );
6094
+ ss -> cancel_fork (child , kargs -> cset );
5964
6095
5965
- cgroup_threadgroup_change_end ( current );
6096
+ cgroup_css_set_put_fork ( kargs );
5966
6097
}
5967
6098
5968
6099
/**
@@ -5972,22 +6103,27 @@ void cgroup_cancel_fork(struct task_struct *child)
5972
6103
* Attach the child process to its css_set calling the subsystem fork()
5973
6104
* callbacks.
5974
6105
*/
5975
- void cgroup_post_fork (struct task_struct * child )
5976
- __releases (& cgroup_threadgroup_rwsem )
6106
+ void cgroup_post_fork (struct task_struct * child ,
6107
+ struct kernel_clone_args * kargs )
6108
+ __releases (& cgroup_threadgroup_rwsem ) __releases (& cgroup_mutex )
5977
6109
{
5978
6110
struct cgroup_subsys * ss ;
5979
6111
struct css_set * cset ;
5980
6112
int i ;
5981
6113
6114
+ cset = kargs -> cset ;
6115
+ kargs -> cset = NULL ;
6116
+
5982
6117
spin_lock_irq (& css_set_lock );
5983
6118
5984
6119
/* init tasks are special, only link regular threads */
5985
6120
if (likely (child -> pid )) {
5986
6121
WARN_ON_ONCE (!list_empty (& child -> cg_list ));
5987
- cset = task_css_set (current ); /* current is @child's parent */
5988
- get_css_set (cset );
5989
6122
cset -> nr_tasks ++ ;
5990
6123
css_set_move_task (child , NULL , cset , false);
6124
+ } else {
6125
+ put_css_set (cset );
6126
+ cset = NULL ;
5991
6127
}
5992
6128
5993
6129
/*
@@ -6020,7 +6156,16 @@ void cgroup_post_fork(struct task_struct *child)
6020
6156
ss -> fork (child );
6021
6157
} while_each_subsys_mask ();
6022
6158
6023
- cgroup_threadgroup_change_end (current );
6159
+ /* Make the new cset the root_cset of the new cgroup namespace. */
6160
+ if (kargs -> flags & CLONE_NEWCGROUP ) {
6161
+ struct css_set * rcset = child -> nsproxy -> cgroup_ns -> root_cset ;
6162
+
6163
+ get_css_set (cset );
6164
+ child -> nsproxy -> cgroup_ns -> root_cset = cset ;
6165
+ put_css_set (rcset );
6166
+ }
6167
+
6168
+ cgroup_css_set_put_fork (kargs );
6024
6169
}
6025
6170
6026
6171
/**
0 commit comments