| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | |
| 3 | #include <linux/irq-entry-common.h> |
| 4 | #include <linux/resume_user_mode.h> |
| 5 | #include <linux/highmem.h> |
| 6 | #include <linux/jump_label.h> |
| 7 | #include <linux/kmsan.h> |
| 8 | #include <linux/livepatch.h> |
| 9 | #include <linux/tick.h> |
| 10 | |
| 11 | /* Workaround to allow gradual conversion of architecture code */ |
| 12 | void __weak arch_do_signal_or_restart(struct pt_regs *regs) { } |
| 13 | |
| 14 | #ifdef CONFIG_HAVE_GENERIC_TIF_BITS |
| 15 | #define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK & ~_TIF_RSEQ) |
| 16 | #else |
| 17 | #define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK) |
| 18 | #endif |
| 19 | |
| 20 | static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *regs, |
| 21 | unsigned long ti_work) |
| 22 | { |
| 23 | /* |
| 24 | * Before returning to user space ensure that all pending work |
| 25 | * items have been completed. |
| 26 | */ |
| 27 | while (ti_work & EXIT_TO_USER_MODE_WORK_LOOP) { |
| 28 | |
| 29 | local_irq_enable_exit_to_user(ti_work); |
| 30 | |
| 31 | if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) |
| 32 | schedule(); |
| 33 | |
| 34 | if (ti_work & _TIF_UPROBE) |
| 35 | uprobe_notify_resume(regs); |
| 36 | |
| 37 | if (ti_work & _TIF_PATCH_PENDING) |
| 38 | klp_update_patch_state(current); |
| 39 | |
| 40 | if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) |
| 41 | arch_do_signal_or_restart(regs); |
| 42 | |
| 43 | if (ti_work & _TIF_NOTIFY_RESUME) |
| 44 | resume_user_mode_work(regs); |
| 45 | |
| 46 | /* Architecture specific TIF work */ |
| 47 | arch_exit_to_user_mode_work(regs, ti_work); |
| 48 | |
| 49 | /* |
| 50 | * Disable interrupts and reevaluate the work flags as they |
| 51 | * might have changed while interrupts and preemption was |
| 52 | * enabled above. |
| 53 | */ |
| 54 | local_irq_disable_exit_to_user(); |
| 55 | |
| 56 | /* Check if any of the above work has queued a deferred wakeup */ |
| 57 | tick_nohz_user_enter_prepare(); |
| 58 | |
| 59 | ti_work = read_thread_flags(); |
| 60 | } |
| 61 | |
| 62 | /* Return the latest work state for arch_exit_to_user_mode() */ |
| 63 | return ti_work; |
| 64 | } |
| 65 | |
| 66 | /** |
| 67 | * exit_to_user_mode_loop - do any pending work before leaving to user space |
| 68 | * @regs: Pointer to pt_regs on entry stack |
| 69 | * @ti_work: TIF work flags as read by the caller |
| 70 | */ |
| 71 | __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs, |
| 72 | unsigned long ti_work) |
| 73 | { |
| 74 | for (;;) { |
| 75 | ti_work = __exit_to_user_mode_loop(regs, ti_work); |
| 76 | |
| 77 | if (likely(!rseq_exit_to_user_mode_restart(regs, ti_work))) |
| 78 | return ti_work; |
| 79 | ti_work = read_thread_flags(); |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) |
| 84 | { |
| 85 | irqentry_state_t ret = { |
| 86 | .exit_rcu = false, |
| 87 | }; |
| 88 | |
| 89 | if (user_mode(regs)) { |
| 90 | irqentry_enter_from_user_mode(regs); |
| 91 | return ret; |
| 92 | } |
| 93 | |
| 94 | /* |
| 95 | * If this entry hit the idle task invoke ct_irq_enter() whether |
| 96 | * RCU is watching or not. |
| 97 | * |
| 98 | * Interrupts can nest when the first interrupt invokes softirq |
| 99 | * processing on return which enables interrupts. |
| 100 | * |
| 101 | * Scheduler ticks in the idle task can mark quiescent state and |
| 102 | * terminate a grace period, if and only if the timer interrupt is |
| 103 | * not nested into another interrupt. |
| 104 | * |
| 105 | * Checking for rcu_is_watching() here would prevent the nesting |
| 106 | * interrupt to invoke ct_irq_enter(). If that nested interrupt is |
| 107 | * the tick then rcu_flavor_sched_clock_irq() would wrongfully |
| 108 | * assume that it is the first interrupt and eventually claim |
| 109 | * quiescent state and end grace periods prematurely. |
| 110 | * |
| 111 | * Unconditionally invoke ct_irq_enter() so RCU state stays |
| 112 | * consistent. |
| 113 | * |
| 114 | * TINY_RCU does not support EQS, so let the compiler eliminate |
| 115 | * this part when enabled. |
| 116 | */ |
| 117 | if (!IS_ENABLED(CONFIG_TINY_RCU) && |
| 118 | (is_idle_task(current) || arch_in_rcu_eqs())) { |
| 119 | /* |
| 120 | * If RCU is not watching then the same careful |
| 121 | * sequence vs. lockdep and tracing is required |
| 122 | * as in irqentry_enter_from_user_mode(). |
| 123 | */ |
| 124 | lockdep_hardirqs_off(CALLER_ADDR0); |
| 125 | ct_irq_enter(); |
| 126 | instrumentation_begin(); |
| 127 | kmsan_unpoison_entry_regs(regs); |
| 128 | trace_hardirqs_off_finish(); |
| 129 | instrumentation_end(); |
| 130 | |
| 131 | ret.exit_rcu = true; |
| 132 | return ret; |
| 133 | } |
| 134 | |
| 135 | /* |
| 136 | * If RCU is watching then RCU only wants to check whether it needs |
| 137 | * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() |
| 138 | * already contains a warning when RCU is not watching, so no point |
| 139 | * in having another one here. |
| 140 | */ |
| 141 | lockdep_hardirqs_off(CALLER_ADDR0); |
| 142 | instrumentation_begin(); |
| 143 | kmsan_unpoison_entry_regs(regs); |
| 144 | rcu_irq_enter_check_tick(); |
| 145 | trace_hardirqs_off_finish(); |
| 146 | instrumentation_end(); |
| 147 | |
| 148 | return ret; |
| 149 | } |
| 150 | |
| 151 | /** |
| 152 | * arch_irqentry_exit_need_resched - Architecture specific need resched function |
| 153 | * |
| 154 | * Invoked from raw_irqentry_exit_cond_resched() to check if resched is needed. |
| 155 | * Defaults return true. |
| 156 | * |
| 157 | * The main purpose is to permit arch to avoid preemption of a task from an IRQ. |
| 158 | */ |
| 159 | static inline bool arch_irqentry_exit_need_resched(void); |
| 160 | |
| 161 | #ifndef arch_irqentry_exit_need_resched |
| 162 | static inline bool arch_irqentry_exit_need_resched(void) { return true; } |
| 163 | #endif |
| 164 | |
| 165 | void raw_irqentry_exit_cond_resched(void) |
| 166 | { |
| 167 | if (!preempt_count()) { |
| 168 | /* Sanity check RCU and thread stack */ |
| 169 | rcu_irq_exit_check_preempt(); |
| 170 | if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) |
| 171 | WARN_ON_ONCE(!on_thread_stack()); |
| 172 | if (need_resched() && arch_irqentry_exit_need_resched()) |
| 173 | preempt_schedule_irq(); |
| 174 | } |
| 175 | } |
| 176 | #ifdef CONFIG_PREEMPT_DYNAMIC |
| 177 | #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) |
| 178 | DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); |
| 179 | #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) |
| 180 | DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); |
| 181 | void dynamic_irqentry_exit_cond_resched(void) |
| 182 | { |
| 183 | if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) |
| 184 | return; |
| 185 | raw_irqentry_exit_cond_resched(); |
| 186 | } |
| 187 | #endif |
| 188 | #endif |
| 189 | |
| 190 | noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) |
| 191 | { |
| 192 | lockdep_assert_irqs_disabled(); |
| 193 | |
| 194 | /* Check whether this returns to user mode */ |
| 195 | if (user_mode(regs)) { |
| 196 | irqentry_exit_to_user_mode(regs); |
| 197 | } else if (!regs_irqs_disabled(regs)) { |
| 198 | /* |
| 199 | * If RCU was not watching on entry this needs to be done |
| 200 | * carefully and needs the same ordering of lockdep/tracing |
| 201 | * and RCU as the return to user mode path. |
| 202 | */ |
| 203 | if (state.exit_rcu) { |
| 204 | instrumentation_begin(); |
| 205 | /* Tell the tracer that IRET will enable interrupts */ |
| 206 | trace_hardirqs_on_prepare(); |
| 207 | lockdep_hardirqs_on_prepare(); |
| 208 | instrumentation_end(); |
| 209 | ct_irq_exit(); |
| 210 | lockdep_hardirqs_on(CALLER_ADDR0); |
| 211 | return; |
| 212 | } |
| 213 | |
| 214 | instrumentation_begin(); |
| 215 | if (IS_ENABLED(CONFIG_PREEMPTION)) |
| 216 | irqentry_exit_cond_resched(); |
| 217 | |
| 218 | /* Covers both tracing and lockdep */ |
| 219 | trace_hardirqs_on(); |
| 220 | instrumentation_end(); |
| 221 | } else { |
| 222 | /* |
| 223 | * IRQ flags state is correct already. Just tell RCU if it |
| 224 | * was not watching on entry. |
| 225 | */ |
| 226 | if (state.exit_rcu) |
| 227 | ct_irq_exit(); |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs) |
| 232 | { |
| 233 | irqentry_state_t irq_state; |
| 234 | |
| 235 | irq_state.lockdep = lockdep_hardirqs_enabled(); |
| 236 | |
| 237 | __nmi_enter(); |
| 238 | lockdep_hardirqs_off(CALLER_ADDR0); |
| 239 | lockdep_hardirq_enter(); |
| 240 | ct_nmi_enter(); |
| 241 | |
| 242 | instrumentation_begin(); |
| 243 | kmsan_unpoison_entry_regs(regs); |
| 244 | trace_hardirqs_off_finish(); |
| 245 | ftrace_nmi_enter(); |
| 246 | instrumentation_end(); |
| 247 | |
| 248 | return irq_state; |
| 249 | } |
| 250 | |
| 251 | void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) |
| 252 | { |
| 253 | instrumentation_begin(); |
| 254 | ftrace_nmi_exit(); |
| 255 | if (irq_state.lockdep) { |
| 256 | trace_hardirqs_on_prepare(); |
| 257 | lockdep_hardirqs_on_prepare(); |
| 258 | } |
| 259 | instrumentation_end(); |
| 260 | |
| 261 | ct_nmi_exit(); |
| 262 | lockdep_hardirq_exit(); |
| 263 | if (irq_state.lockdep) |
| 264 | lockdep_hardirqs_on(CALLER_ADDR0); |
| 265 | __nmi_exit(); |
| 266 | } |
| 267 | |