| [2557] | 1 | From efccdcdb63a7f7cc7cc1816f0d5e2524eb084c72 Mon Sep 17 00:00:00 2001 | 
|---|
|  | 2 | From: Thomas Gleixner <tglx@linutronix.de> | 
|---|
|  | 3 | Date: Tue, 3 Jun 2014 12:27:08 +0000 | 
|---|
|  | 4 | Subject: [PATCH 4/4] futex: Make lookup_pi_state more robust | 
|---|
|  | 5 |  | 
|---|
|  | 6 | commit 54a217887a7b658e2650c3feff22756ab80c7339 upstream. | 
|---|
|  | 7 |  | 
|---|
|  | 8 | The current implementation of lookup_pi_state has ambigous handling of | 
|---|
|  | 9 | the TID value 0 in the user space futex.  We can get into the kernel | 
|---|
|  | 10 | even if the TID value is 0, because either there is a stale waiters bit | 
|---|
|  | 11 | or the owner died bit is set or we are called from the requeue_pi path | 
|---|
|  | 12 | or from user space just for fun. | 
|---|
|  | 13 |  | 
|---|
|  | 14 | The current code avoids an explicit sanity check for pid = 0 in case | 
|---|
|  | 15 | that kernel internal state (waiters) are found for the user space | 
|---|
|  | 16 | address.  This can lead to state leakage and worse under some | 
|---|
|  | 17 | circumstances. | 
|---|
|  | 18 |  | 
|---|
|  | 19 | Handle the cases explicit: | 
|---|
|  | 20 |  | 
|---|
|  | 21 | Waiter | pi_state | pi->owner | uTID      | uODIED | ? | 
|---|
|  | 22 |  | 
|---|
|  | 23 | [1]  NULL   | ---      | ---       | 0         | 0/1    | Valid | 
|---|
|  | 24 | [2]  NULL   | ---      | ---       | >0        | 0/1    | Valid | 
|---|
|  | 25 |  | 
|---|
|  | 26 | [3]  Found  | NULL     | --        | Any       | 0/1    | Invalid | 
|---|
|  | 27 |  | 
|---|
|  | 28 | [4]  Found  | Found    | NULL      | 0         | 1      | Valid | 
|---|
|  | 29 | [5]  Found  | Found    | NULL      | >0        | 1      | Invalid | 
|---|
|  | 30 |  | 
|---|
|  | 31 | [6]  Found  | Found    | task      | 0         | 1      | Valid | 
|---|
|  | 32 |  | 
|---|
|  | 33 | [7]  Found  | Found    | NULL      | Any       | 0      | Invalid | 
|---|
|  | 34 |  | 
|---|
|  | 35 | [8]  Found  | Found    | task      | ==taskTID | 0/1    | Valid | 
|---|
|  | 36 | [9]  Found  | Found    | task      | 0         | 0      | Invalid | 
|---|
|  | 37 | [10] Found  | Found    | task      | !=taskTID | 0/1    | Invalid | 
|---|
|  | 38 |  | 
|---|
|  | 39 | [1] Indicates that the kernel can acquire the futex atomically. We | 
|---|
|  | 40 | came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. | 
|---|
|  | 41 |  | 
|---|
|  | 42 | [2] Valid, if TID does not belong to a kernel thread. If no matching | 
|---|
|  | 43 | thread is found then it indicates that the owner TID has died. | 
|---|
|  | 44 |  | 
|---|
|  | 45 | [3] Invalid. The waiter is queued on a non PI futex | 
|---|
|  | 46 |  | 
|---|
|  | 47 | [4] Valid state after exit_robust_list(), which sets the user space | 
|---|
|  | 48 | value to FUTEX_WAITERS | FUTEX_OWNER_DIED. | 
|---|
|  | 49 |  | 
|---|
|  | 50 | [5] The user space value got manipulated between exit_robust_list() | 
|---|
|  | 51 | and exit_pi_state_list() | 
|---|
|  | 52 |  | 
|---|
|  | 53 | [6] Valid state after exit_pi_state_list() which sets the new owner in | 
|---|
|  | 54 | the pi_state but cannot access the user space value. | 
|---|
|  | 55 |  | 
|---|
|  | 56 | [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. | 
|---|
|  | 57 |  | 
|---|
|  | 58 | [8] Owner and user space value match | 
|---|
|  | 59 |  | 
|---|
|  | 60 | [9] There is no transient state which sets the user space TID to 0 | 
|---|
|  | 61 | except exit_robust_list(), but this is indicated by the | 
|---|
|  | 62 | FUTEX_OWNER_DIED bit. See [4] | 
|---|
|  | 63 |  | 
|---|
|  | 64 | [10] There is no transient state which leaves owner and user space | 
|---|
|  | 65 | TID out of sync. | 
|---|
|  | 66 |  | 
|---|
|  | 67 | Signed-off-by: Thomas Gleixner <tglx@linutronix.de> | 
|---|
|  | 68 | Cc: Kees Cook <keescook@chromium.org> | 
|---|
|  | 69 | Cc: Will Drewry <wad@chromium.org> | 
|---|
|  | 70 | Cc: Darren Hart <dvhart@linux.intel.com> | 
|---|
|  | 71 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | 
|---|
|  | 72 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 
|---|
|  | 73 | --- | 
|---|
|  | 74 | kernel/futex.c |  134 ++++++++++++++++++++++++++++++++++++++++++++------------ | 
|---|
|  | 75 | 1 file changed, 106 insertions(+), 28 deletions(-) | 
|---|
|  | 76 |  | 
|---|
|  | 77 | diff --git a/kernel/futex.c b/kernel/futex.c | 
|---|
|  | 78 | index 9720c42..625a4e6 100644 | 
|---|
|  | 79 | --- a/kernel/futex.c | 
|---|
|  | 80 | +++ b/kernel/futex.c | 
|---|
|  | 81 | @@ -592,10 +592,58 @@ void exit_pi_state_list(struct task_struct *curr) | 
|---|
|  | 82 | raw_spin_unlock_irq(&curr->pi_lock); | 
|---|
|  | 83 | } | 
|---|
|  | 84 |  | 
|---|
|  | 85 | +/* | 
|---|
|  | 86 | + * We need to check the following states: | 
|---|
|  | 87 | + * | 
|---|
|  | 88 | + *      Waiter | pi_state | pi->owner | uTID      | uODIED | ? | 
|---|
|  | 89 | + * | 
|---|
|  | 90 | + * [1]  NULL   | ---      | ---       | 0         | 0/1    | Valid | 
|---|
|  | 91 | + * [2]  NULL   | ---      | ---       | >0        | 0/1    | Valid | 
|---|
|  | 92 | + * | 
|---|
|  | 93 | + * [3]  Found  | NULL     | --        | Any       | 0/1    | Invalid | 
|---|
|  | 94 | + * | 
|---|
|  | 95 | + * [4]  Found  | Found    | NULL      | 0         | 1      | Valid | 
|---|
|  | 96 | + * [5]  Found  | Found    | NULL      | >0        | 1      | Invalid | 
|---|
|  | 97 | + * | 
|---|
|  | 98 | + * [6]  Found  | Found    | task      | 0         | 1      | Valid | 
|---|
|  | 99 | + * | 
|---|
|  | 100 | + * [7]  Found  | Found    | NULL      | Any       | 0      | Invalid | 
|---|
|  | 101 | + * | 
|---|
|  | 102 | + * [8]  Found  | Found    | task      | ==taskTID | 0/1    | Valid | 
|---|
|  | 103 | + * [9]  Found  | Found    | task      | 0         | 0      | Invalid | 
|---|
|  | 104 | + * [10] Found  | Found    | task      | !=taskTID | 0/1    | Invalid | 
|---|
|  | 105 | + * | 
|---|
|  | 106 | + * [1] Indicates that the kernel can acquire the futex atomically. We | 
|---|
|  | 107 | + *     came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. | 
|---|
|  | 108 | + * | 
|---|
|  | 109 | + * [2] Valid, if TID does not belong to a kernel thread. If no matching | 
|---|
|  | 110 | + *      thread is found then it indicates that the owner TID has died. | 
|---|
|  | 111 | + * | 
|---|
|  | 112 | + * [3] Invalid. The waiter is queued on a non PI futex | 
|---|
|  | 113 | + * | 
|---|
|  | 114 | + * [4] Valid state after exit_robust_list(), which sets the user space | 
|---|
|  | 115 | + *     value to FUTEX_WAITERS | FUTEX_OWNER_DIED. | 
|---|
|  | 116 | + * | 
|---|
|  | 117 | + * [5] The user space value got manipulated between exit_robust_list() | 
|---|
|  | 118 | + *     and exit_pi_state_list() | 
|---|
|  | 119 | + * | 
|---|
|  | 120 | + * [6] Valid state after exit_pi_state_list() which sets the new owner in | 
|---|
|  | 121 | + *     the pi_state but cannot access the user space value. | 
|---|
|  | 122 | + * | 
|---|
|  | 123 | + * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. | 
|---|
|  | 124 | + * | 
|---|
|  | 125 | + * [8] Owner and user space value match | 
|---|
|  | 126 | + * | 
|---|
|  | 127 | + * [9] There is no transient state which sets the user space TID to 0 | 
|---|
|  | 128 | + *     except exit_robust_list(), but this is indicated by the | 
|---|
|  | 129 | + *     FUTEX_OWNER_DIED bit. See [4] | 
|---|
|  | 130 | + * | 
|---|
|  | 131 | + * [10] There is no transient state which leaves owner and user space | 
|---|
|  | 132 | + *     TID out of sync. | 
|---|
|  | 133 | + */ | 
|---|
|  | 134 | static int | 
|---|
|  | 135 | lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | 
|---|
|  | 136 | -               union futex_key *key, struct futex_pi_state **ps, | 
|---|
|  | 137 | -               struct task_struct *task) | 
|---|
|  | 138 | +               union futex_key *key, struct futex_pi_state **ps) | 
|---|
|  | 139 | { | 
|---|
|  | 140 | struct futex_pi_state *pi_state = NULL; | 
|---|
|  | 141 | struct futex_q *this, *next; | 
|---|
|  | 142 | @@ -608,12 +656,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | 
|---|
|  | 143 | plist_for_each_entry_safe(this, next, head, list) { | 
|---|
|  | 144 | if (match_futex(&this->key, key)) { | 
|---|
|  | 145 | /* | 
|---|
|  | 146 | -                        * Another waiter already exists - bump up | 
|---|
|  | 147 | -                        * the refcount and return its pi_state: | 
|---|
|  | 148 | +                        * Sanity check the waiter before increasing | 
|---|
|  | 149 | +                        * the refcount and attaching to it. | 
|---|
|  | 150 | */ | 
|---|
|  | 151 | pi_state = this->pi_state; | 
|---|
|  | 152 | /* | 
|---|
|  | 153 | -                        * Userspace might have messed up non-PI and PI futexes | 
|---|
|  | 154 | +                        * Userspace might have messed up non-PI and | 
|---|
|  | 155 | +                        * PI futexes [3] | 
|---|
|  | 156 | */ | 
|---|
|  | 157 | if (unlikely(!pi_state)) | 
|---|
|  | 158 | return -EINVAL; | 
|---|
|  | 159 | @@ -621,44 +670,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | 
|---|
|  | 160 | WARN_ON(!atomic_read(&pi_state->refcount)); | 
|---|
|  | 161 |  | 
|---|
|  | 162 | /* | 
|---|
|  | 163 | -                        * When pi_state->owner is NULL then the owner died | 
|---|
|  | 164 | -                        * and another waiter is on the fly. pi_state->owner | 
|---|
|  | 165 | -                        * is fixed up by the task which acquires | 
|---|
|  | 166 | -                        * pi_state->rt_mutex. | 
|---|
|  | 167 | -                        * | 
|---|
|  | 168 | -                        * We do not check for pid == 0 which can happen when | 
|---|
|  | 169 | -                        * the owner died and robust_list_exit() cleared the | 
|---|
|  | 170 | -                        * TID. | 
|---|
|  | 171 | +                        * Handle the owner died case: | 
|---|
|  | 172 | */ | 
|---|
|  | 173 | -                       if (pid && pi_state->owner) { | 
|---|
|  | 174 | +                       if (uval & FUTEX_OWNER_DIED) { | 
|---|
|  | 175 | /* | 
|---|
|  | 176 | -                                * Bail out if user space manipulated the | 
|---|
|  | 177 | -                                * futex value. | 
|---|
|  | 178 | +                                * exit_pi_state_list sets owner to NULL and | 
|---|
|  | 179 | +                                * wakes the topmost waiter. The task which | 
|---|
|  | 180 | +                                * acquires the pi_state->rt_mutex will fixup | 
|---|
|  | 181 | +                                * owner. | 
|---|
|  | 182 | */ | 
|---|
|  | 183 | -                               if (pid != task_pid_vnr(pi_state->owner)) | 
|---|
|  | 184 | +                               if (!pi_state->owner) { | 
|---|
|  | 185 | +                                       /* | 
|---|
|  | 186 | +                                        * No pi state owner, but the user | 
|---|
|  | 187 | +                                        * space TID is not 0. Inconsistent | 
|---|
|  | 188 | +                                        * state. [5] | 
|---|
|  | 189 | +                                        */ | 
|---|
|  | 190 | +                                       if (pid) | 
|---|
|  | 191 | +                                               return -EINVAL; | 
|---|
|  | 192 | +                                       /* | 
|---|
|  | 193 | +                                        * Take a ref on the state and | 
|---|
|  | 194 | +                                        * return. [4] | 
|---|
|  | 195 | +                                        */ | 
|---|
|  | 196 | +                                       goto out_state; | 
|---|
|  | 197 | +                               } | 
|---|
|  | 198 | + | 
|---|
|  | 199 | +                               /* | 
|---|
|  | 200 | +                                * If TID is 0, then either the dying owner | 
|---|
|  | 201 | +                                * has not yet executed exit_pi_state_list() | 
|---|
|  | 202 | +                                * or some waiter acquired the rtmutex in the | 
|---|
|  | 203 | +                                * pi state, but did not yet fixup the TID in | 
|---|
|  | 204 | +                                * user space. | 
|---|
|  | 205 | +                                * | 
|---|
|  | 206 | +                                * Take a ref on the state and return. [6] | 
|---|
|  | 207 | +                                */ | 
|---|
|  | 208 | +                               if (!pid) | 
|---|
|  | 209 | +                                       goto out_state; | 
|---|
|  | 210 | +                       } else { | 
|---|
|  | 211 | +                               /* | 
|---|
|  | 212 | +                                * If the owner died bit is not set, | 
|---|
|  | 213 | +                                * then the pi_state must have an | 
|---|
|  | 214 | +                                * owner. [7] | 
|---|
|  | 215 | +                                */ | 
|---|
|  | 216 | +                               if (!pi_state->owner) | 
|---|
|  | 217 | return -EINVAL; | 
|---|
|  | 218 | } | 
|---|
|  | 219 |  | 
|---|
|  | 220 | /* | 
|---|
|  | 221 | -                        * Protect against a corrupted uval. If uval | 
|---|
|  | 222 | -                        * is 0x80000000 then pid is 0 and the waiter | 
|---|
|  | 223 | -                        * bit is set. So the deadlock check in the | 
|---|
|  | 224 | -                        * calling code has failed and we did not fall | 
|---|
|  | 225 | -                        * into the check above due to !pid. | 
|---|
|  | 226 | +                        * Bail out if user space manipulated the | 
|---|
|  | 227 | +                        * futex value. If pi state exists then the | 
|---|
|  | 228 | +                        * owner TID must be the same as the user | 
|---|
|  | 229 | +                        * space TID. [9/10] | 
|---|
|  | 230 | */ | 
|---|
|  | 231 | -                       if (task && pi_state->owner == task) | 
|---|
|  | 232 | -                               return -EDEADLK; | 
|---|
|  | 233 | +                       if (pid != task_pid_vnr(pi_state->owner)) | 
|---|
|  | 234 | +                               return -EINVAL; | 
|---|
|  | 235 |  | 
|---|
|  | 236 | +               out_state: | 
|---|
|  | 237 | atomic_inc(&pi_state->refcount); | 
|---|
|  | 238 | *ps = pi_state; | 
|---|
|  | 239 | - | 
|---|
|  | 240 | return 0; | 
|---|
|  | 241 | } | 
|---|
|  | 242 | } | 
|---|
|  | 243 |  | 
|---|
|  | 244 | /* | 
|---|
|  | 245 | * We are the first waiter - try to look up the real owner and attach | 
|---|
|  | 246 | -        * the new pi_state to it, but bail out when TID = 0 | 
|---|
|  | 247 | +        * the new pi_state to it, but bail out when TID = 0 [1] | 
|---|
|  | 248 | */ | 
|---|
|  | 249 | if (!pid) | 
|---|
|  | 250 | return -ESRCH; | 
|---|
|  | 251 | @@ -691,6 +766,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | 
|---|
|  | 252 | return ret; | 
|---|
|  | 253 | } | 
|---|
|  | 254 |  | 
|---|
|  | 255 | +       /* | 
|---|
|  | 256 | +        * No existing pi state. First waiter. [2] | 
|---|
|  | 257 | +        */ | 
|---|
|  | 258 | pi_state = alloc_pi_state(); | 
|---|
|  | 259 |  | 
|---|
|  | 260 | /* | 
|---|
|  | 261 | @@ -811,7 +889,7 @@ retry: | 
|---|
|  | 262 | * We dont have the lock. Look up the PI state (or create it if | 
|---|
|  | 263 | * we are the first waiter): | 
|---|
|  | 264 | */ | 
|---|
|  | 265 | -       ret = lookup_pi_state(uval, hb, key, ps, task); | 
|---|
|  | 266 | +       ret = lookup_pi_state(uval, hb, key, ps); | 
|---|
|  | 267 |  | 
|---|
|  | 268 | if (unlikely(ret)) { | 
|---|
|  | 269 | switch (ret) { | 
|---|
|  | 270 | @@ -1414,7 +1492,7 @@ retry_private: | 
|---|
|  | 271 | * rereading and handing potential crap to | 
|---|
|  | 272 | * lookup_pi_state. | 
|---|
|  | 273 | */ | 
|---|
|  | 274 | -                       ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL); | 
|---|
|  | 275 | +                       ret = lookup_pi_state(ret, hb2, &key2, &pi_state); | 
|---|
|  | 276 | } | 
|---|
|  | 277 |  | 
|---|
|  | 278 | switch (ret) { | 
|---|
|  | 279 | -- | 
|---|
|  | 280 | 1.7.10.4 | 
|---|
|  | 281 |  | 
|---|