[IA64] fix siglock
[pandora-kernel.git] / arch / ia64 / kernel / fsys.S
1 /*
2  * This file contains the light-weight system call handlers (fsyscall-handlers).
3  *
4  * Copyright (C) 2003 Hewlett-Packard Co
5  *      David Mosberger-Tang <davidm@hpl.hp.com>
6  *
7  * 25-Sep-03 davidm     Implement fsys_rt_sigprocmask().
8  * 18-Feb-03 louisk     Implement fsys_gettimeofday().
9  * 28-Feb-03 davidm     Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
10  *                      probably broke it along the way... ;-)
11  * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
12  *                      it capable of using memory based clocks without falling back to C code.
13  * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
14  *
15  */
16
17 #include <asm/asmmacro.h>
18 #include <asm/errno.h>
19 #include <asm/asm-offsets.h>
20 #include <asm/percpu.h>
21 #include <asm/thread_info.h>
22 #include <asm/sal.h>
23 #include <asm/signal.h>
24 #include <asm/system.h>
25 #include <asm/unistd.h>
26
27 #include "entry.h"
28 #include "paravirt_inst.h"
29
30 /*
31  * See Documentation/ia64/fsys.txt for details on fsyscalls.
32  *
33  * On entry to an fsyscall handler:
34  *   r10        = 0 (i.e., defaults to "successful syscall return")
35  *   r11        = saved ar.pfs (a user-level value)
36  *   r15        = system call number
37  *   r16        = "current" task pointer (in normal kernel-mode, this is in r13)
38  *   r32-r39    = system call arguments
39  *   b6         = return address (a user-level value)
40  *   ar.pfs     = previous frame-state (a user-level value)
41  *   PSR.be     = cleared to zero (i.e., little-endian byte order is in effect)
42  *   all other registers may contain values passed in from user-mode
43  *
44  * On return from an fsyscall handler:
45  *   r11        = saved ar.pfs (as passed into the fsyscall handler)
46  *   r15        = system call number (as passed into the fsyscall handler)
47  *   r32-r39    = system call arguments (as passed into the fsyscall handler)
48  *   b6         = return address (as passed into the fsyscall handler)
49  *   ar.pfs     = previous frame-state (as passed into the fsyscall handler)
50  */
51
52 ENTRY(fsys_ni_syscall)
53         .prologue
54         .altrp b6
55         .body
56         mov r8=ENOSYS
57         mov r10=-1
58         FSYS_RETURN
59 END(fsys_ni_syscall)
60
61 ENTRY(fsys_getpid)
62         .prologue
63         .altrp b6
64         .body
65         add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
66         ;;
67         ld8 r17=[r17]                           // r17 = current->group_leader
68         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
69         ;;
70         ld4 r9=[r9]
71         add r17=IA64_TASK_TGIDLINK_OFFSET,r17
72         ;;
73         and r9=TIF_ALLWORK_MASK,r9
74         ld8 r17=[r17]                           // r17 = current->group_leader->pids[PIDTYPE_PID].pid
75         ;;
76         add r8=IA64_PID_LEVEL_OFFSET,r17
77         ;;
78         ld4 r8=[r8]                             // r8 = pid->level
79         add r17=IA64_PID_UPID_OFFSET,r17        // r17 = &pid->numbers[0]
80         ;;
81         shl r8=r8,IA64_UPID_SHIFT
82         ;;
83         add r17=r17,r8                          // r17 = &pid->numbers[pid->level]
84         ;;
85         ld4 r8=[r17]                            // r8 = pid->numbers[pid->level].nr
86         ;;
87         mov r17=0
88         ;;
89         cmp.ne p8,p0=0,r9
90 (p8)    br.spnt.many fsys_fallback_syscall
91         FSYS_RETURN
92 END(fsys_getpid)
93
94 ENTRY(fsys_getppid)
95         .prologue
96         .altrp b6
97         .body
98         add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
99         ;;
100         ld8 r17=[r17]                           // r17 = current->group_leader
101         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
102         ;;
103
104         ld4 r9=[r9]
105         add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
106         ;;
107         and r9=TIF_ALLWORK_MASK,r9
108
109 1:      ld8 r18=[r17]                           // r18 = current->group_leader->real_parent
110         ;;
111         cmp.ne p8,p0=0,r9
112         add r8=IA64_TASK_TGID_OFFSET,r18        // r8 = &current->group_leader->real_parent->tgid
113         ;;
114
115         /*
116          * The .acq is needed to ensure that the read of tgid has returned its data before
117          * we re-check "real_parent".
118          */
119         ld4.acq r8=[r8]                         // r8 = current->group_leader->real_parent->tgid
120 #ifdef CONFIG_SMP
121         /*
122          * Re-read current->group_leader->real_parent.
123          */
124         ld8 r19=[r17]                           // r19 = current->group_leader->real_parent
125 (p8)    br.spnt.many fsys_fallback_syscall
126         ;;
127         cmp.ne p6,p0=r18,r19                    // did real_parent change?
128         mov r19=0                       // i must not leak kernel bits...
129 (p6)    br.cond.spnt.few 1b                     // yes -> redo the read of tgid and the check
130         ;;
131         mov r17=0                       // i must not leak kernel bits...
132         mov r18=0                       // i must not leak kernel bits...
133 #else
134         mov r17=0                       // i must not leak kernel bits...
135         mov r18=0                       // i must not leak kernel bits...
136         mov r19=0                       // i must not leak kernel bits...
137 #endif
138         FSYS_RETURN
139 END(fsys_getppid)
140
141 ENTRY(fsys_set_tid_address)
142         .prologue
143         .altrp b6
144         .body
145         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
146         add r17=IA64_TASK_TGIDLINK_OFFSET,r16
147         ;;
148         ld4 r9=[r9]
149         tnat.z p6,p7=r32                // check argument register for being NaT
150         ld8 r17=[r17]                           // r17 = current->pids[PIDTYPE_PID].pid
151         ;;
152         and r9=TIF_ALLWORK_MASK,r9
153         add r8=IA64_PID_LEVEL_OFFSET,r17
154         add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
155         ;;
156         ld4 r8=[r8]                             // r8 = pid->level
157         add r17=IA64_PID_UPID_OFFSET,r17        // r17 = &pid->numbers[0]
158         ;;
159         shl r8=r8,IA64_UPID_SHIFT
160         ;;
161         add r17=r17,r8                          // r17 = &pid->numbers[pid->level]
162         ;;
163         ld4 r8=[r17]                            // r8 = pid->numbers[pid->level].nr
164         ;;
165         cmp.ne p8,p0=0,r9
166         mov r17=-1
167         ;;
168 (p6)    st8 [r18]=r32
169 (p7)    st8 [r18]=r17
170 (p8)    br.spnt.many fsys_fallback_syscall
171         ;;
172         mov r17=0                       // i must not leak kernel bits...
173         mov r18=0                       // i must not leak kernel bits...
174         FSYS_RETURN
175 END(fsys_set_tid_address)
176
177 #if IA64_GTOD_LOCK_OFFSET !=0
178 #error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
179 #endif
180 #if IA64_ITC_JITTER_OFFSET !=0
181 #error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
182 #endif
183 #define CLOCK_REALTIME 0
184 #define CLOCK_MONOTONIC 1
185 #define CLOCK_DIVIDE_BY_1000 0x4000
186 #define CLOCK_ADD_MONOTONIC 0x8000
187
188 ENTRY(fsys_gettimeofday)
189         .prologue
190         .altrp b6
191         .body
192         mov r31 = r32
193         tnat.nz p6,p0 = r33             // guard against NaT argument
194 (p6)    br.cond.spnt.few .fail_einval
195         mov r30 = CLOCK_DIVIDE_BY_1000
196         ;;
197 .gettime:
198         // Register map
199         // Incoming r31 = pointer to address where to place result
200         //          r30 = flags determining how time is processed
201         // r2,r3 = temp r4-r7 preserved
202         // r8 = result nanoseconds
203         // r9 = result seconds
204         // r10 = temporary storage for clock difference
205         // r11 = preserved: saved ar.pfs
206         // r12 = preserved: memory stack
207         // r13 = preserved: thread pointer
208         // r14 = address of mask / mask value
209         // r15 = preserved: system call number
210         // r16 = preserved: current task pointer
211         // r17 = (not used)
212         // r18 = (not used)
213         // r19 = address of itc_lastcycle
214         // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
215         // r21 = address of mmio_ptr
216         // r22 = address of wall_time or monotonic_time
217         // r23 = address of shift / value
218         // r24 = address mult factor / cycle_last value
219         // r25 = itc_lastcycle value
220         // r26 = address clocksource cycle_last
221         // r27 = (not used)
222         // r28 = sequence number at the beginning of critcal section
223         // r29 = address of itc_jitter
224         // r30 = time processing flags / memory address
225         // r31 = pointer to result
226         // Predicates
227         // p6,p7 short term use
228         // p8 = timesource ar.itc
229         // p9 = timesource mmio64
230         // p10 = timesource mmio32 - not used
231         // p11 = timesource not to be handled by asm code
232         // p12 = memory time source ( = p9 | p10) - not used
233         // p13 = do cmpxchg with itc_lastcycle
234         // p14 = Divide by 1000
235         // p15 = Add monotonic
236         //
237         // Note that instructions are optimized for McKinley. McKinley can
238         // process two bundles simultaneously and therefore we continuously
239         // try to feed the CPU two bundles and then a stop.
240
241         add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
242         tnat.nz p6,p0 = r31             // guard against Nat argument
243 (p6)    br.cond.spnt.few .fail_einval
244         movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
245         ;;
246         ld4 r2 = [r2]                   // process work pending flags
247         movl r29 = itc_jitter_data      // itc_jitter
248         add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20        // wall_time
249         add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
250         mov pr = r30,0xc000     // Set predicates according to function
251         ;;
252         and r2 = TIF_ALLWORK_MASK,r2
253         add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
254 (p15)   add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20        // monotonic_time
255         ;;
256         add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20     // clksrc_cycle_last
257         cmp.ne p6, p0 = 0, r2   // Fallback if work is scheduled
258 (p6)    br.cond.spnt.many fsys_fallback_syscall
259         ;;
260         // Begin critical section
261 .time_redo:
262         ld4.acq r28 = [r20]     // gtod_lock.sequence, Must take first
263         ;;
264         and r28 = ~1,r28        // And make sequence even to force retry if odd
265         ;;
266         ld8 r30 = [r21]         // clocksource->mmio_ptr
267         add r24 = IA64_CLKSRC_MULT_OFFSET,r20
268         ld4 r2 = [r29]          // itc_jitter value
269         add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
270         add r14 = IA64_CLKSRC_MASK_OFFSET,r20
271         ;;
272         ld4 r3 = [r24]          // clocksource mult value
273         ld8 r14 = [r14]         // clocksource mask value
274         cmp.eq p8,p9 = 0,r30    // use cpu timer if no mmio_ptr
275         ;;
276         setf.sig f7 = r3        // Setup for mult scaling of counter
277 (p8)    cmp.ne p13,p0 = r2,r0   // need itc_jitter compensation, set p13
278         ld4 r23 = [r23]         // clocksource shift value
279         ld8 r24 = [r26]         // get clksrc_cycle_last value
280 (p9)    cmp.eq p13,p0 = 0,r30   // if mmio_ptr, clear p13 jitter control
281         ;;
282         .pred.rel.mutex p8,p9
283         MOV_FROM_ITC(p8, p6, r2, r10)   // CPU_TIMER. 36 clocks latency!!!
284 (p9)    ld8 r2 = [r30]          // MMIO_TIMER. Could also have latency issues..
285 (p13)   ld8 r25 = [r19]         // get itc_lastcycle value
286         ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET     // tv_sec
287         ;;
288         ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET    // tv_nsec
289 (p13)   sub r3 = r25,r2         // Diff needed before comparison (thanks davidm)
290         ;;
291 (p13)   cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
292         sub r10 = r2,r24        // current_cycle - last_cycle
293         ;;
294 (p6)    sub r10 = r25,r24       // time we got was less than last_cycle
295 (p7)    mov ar.ccv = r25        // more than last_cycle. Prep for cmpxchg
296         ;;
297 (p7)    cmpxchg8.rel r3 = [r19],r2,ar.ccv
298         ;;
299 (p7)    cmp.ne p7,p0 = r25,r3   // if cmpxchg not successful
300         ;;
301 (p7)    sub r10 = r3,r24        // then use new last_cycle instead
302         ;;
303         and r10 = r10,r14       // Apply mask
304         ;;
305         setf.sig f8 = r10
306         nop.i 123
307         ;;
308         // fault check takes 5 cycles and we have spare time
309 EX(.fail_efault, probe.w.fault r31, 3)
310         xmpy.l f8 = f8,f7       // nsec_per_cyc*(counter-last_counter)
311         ;;
312         getf.sig r2 = f8
313         mf
314         ;;
315         ld4 r10 = [r20]         // gtod_lock.sequence
316         shr.u r2 = r2,r23       // shift by factor
317         ;;
318         add r8 = r8,r2          // Add xtime.nsecs
319         cmp4.ne p7,p0 = r28,r10
320 (p7)    br.cond.dpnt.few .time_redo     // sequence number changed, redo
321         // End critical section.
322         // Now r8=tv->tv_nsec and r9=tv->tv_sec
323         mov r10 = r0
324         movl r2 = 1000000000
325         add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
326 (p14)   movl r3 = 2361183241434822607   // Prep for / 1000 hack
327         ;;
328 .time_normalize:
329         mov r21 = r8
330         cmp.ge p6,p0 = r8,r2
331 (p14)   shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
332         ;;
333 (p14)   setf.sig f8 = r20
334 (p6)    sub r8 = r8,r2
335 (p6)    add r9 = 1,r9           // two nops before the branch.
336 (p14)   setf.sig f7 = r3        // Chances for repeats are 1 in 10000 for gettod
337 (p6)    br.cond.dpnt.few .time_normalize
338         ;;
339         // Divided by 8 though shift. Now divide by 125
340         // The compiler was able to do that with a multiply
341         // and a shift and we do the same
342 EX(.fail_efault, probe.w.fault r23, 3)  // This also costs 5 cycles
343 (p14)   xmpy.hu f8 = f8, f7             // xmpy has 5 cycles latency so use it
344         ;;
345 (p14)   getf.sig r2 = f8
346         ;;
347         mov r8 = r0
348 (p14)   shr.u r21 = r2, 4
349         ;;
350 EX(.fail_efault, st8 [r31] = r9)
351 EX(.fail_efault, st8 [r23] = r21)
352         FSYS_RETURN
353 .fail_einval:
354         mov r8 = EINVAL
355         mov r10 = -1
356         FSYS_RETURN
357 .fail_efault:
358         mov r8 = EFAULT
359         mov r10 = -1
360         FSYS_RETURN
361 END(fsys_gettimeofday)
362
363 ENTRY(fsys_clock_gettime)
364         .prologue
365         .altrp b6
366         .body
367         cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
368         // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
369 (p6)    br.spnt.few fsys_fallback_syscall
370         mov r31 = r33
371         shl r30 = r32,15
372         br.many .gettime
373 END(fsys_clock_gettime)
374
375 /*
376  * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
377  */
378 #if _NSIG_WORDS != 1
379 # error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
380 #endif
381 ENTRY(fsys_rt_sigprocmask)
382         .prologue
383         .altrp b6
384         .body
385
386         add r2=IA64_TASK_BLOCKED_OFFSET,r16
387         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
388         cmp4.ltu p6,p0=SIG_SETMASK,r32
389
390         cmp.ne p15,p0=r0,r34                    // oset != NULL?
391         tnat.nz p8,p0=r34
392         add r31=IA64_TASK_SIGHAND_OFFSET,r16
393         ;;
394         ld8 r3=[r2]                             // read/prefetch current->blocked
395         ld4 r9=[r9]
396         tnat.nz.or p6,p0=r35
397
398         cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
399         tnat.nz.or p6,p0=r32
400 (p6)    br.spnt.few .fail_einval                // fail with EINVAL
401         ;;
402 #ifdef CONFIG_SMP
403         ld8 r31=[r31]                           // r31 <- current->sighand
404 #endif
405         and r9=TIF_ALLWORK_MASK,r9
406         tnat.nz.or p8,p0=r33
407         ;;
408         cmp.ne p7,p0=0,r9
409         cmp.eq p6,p0=r0,r33                     // set == NULL?
410         add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
411 (p8)    br.spnt.few .fail_efault                // fail with EFAULT
412 (p7)    br.spnt.many fsys_fallback_syscall      // got pending kernel work...
413 (p6)    br.dpnt.many .store_mask                // -> short-circuit to just reading the signal mask
414
415         /* Argh, we actually have to do some work and _update_ the signal mask: */
416
417 EX(.fail_efault, probe.r.fault r33, 3)          // verify user has read-access to *set
418 EX(.fail_efault, ld8 r14=[r33])                 // r14 <- *set
419         mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
420         ;;
421
422         RSM_PSR_I(p0, r18, r19)                 // mask interrupt delivery
423         mov ar.ccv=0
424         andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
425
426 #ifdef CONFIG_SMP
427         // __ticket_spin_trylock(r31)
428         ld4 r17=[r31]
429         mov r8=EINVAL                   // default to EINVAL
430         ;;
431         extr r9=r17,17,15
432         ;;
433         xor r18=r17,r9
434         adds r19=1,r17
435         ;;
436         extr.u r18=r18,0,15
437         ;;
438         cmp.eq p0,p7=0,r18
439 (p7)    br.cond.spnt.many .lock_contention
440         mov.m ar.ccv=r17
441         ;;
442         cmpxchg4.acq r9=[r31],r19,ar.ccv
443         ;;
444         cmp4.eq p0,p7=r9,r17
445 (p7)    br.cond.spnt.many .lock_contention
446         ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
447         ;;
448 #else
449         ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
450         mov r8=EINVAL                   // default to EINVAL
451 #endif
452         add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
453         add r19=IA64_TASK_SIGNAL_OFFSET,r16
454         cmp4.eq p6,p0=SIG_BLOCK,r32
455         ;;
456         ld8 r19=[r19]                   // r19 <- current->signal
457         cmp4.eq p7,p0=SIG_UNBLOCK,r32
458         cmp4.eq p8,p0=SIG_SETMASK,r32
459         ;;
460         ld8 r18=[r18]                   // r18 <- current->pending.signal
461         .pred.rel.mutex p6,p7,p8
462 (p6)    or r14=r3,r14                   // SIG_BLOCK
463 (p7)    andcm r14=r3,r14                // SIG_UNBLOCK
464
465 (p8)    mov r14=r14                     // SIG_SETMASK
466 (p6)    mov r8=0                        // clear error code
467         // recalc_sigpending()
468         add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
469
470         add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
471         ;;
472         ld4 r17=[r17]           // r17 <- current->signal->group_stop_count
473 (p7)    mov r8=0                // clear error code
474
475         ld8 r19=[r19]           // r19 <- current->signal->shared_pending
476         ;;
477         cmp4.gt p6,p7=r17,r0    // p6/p7 <- (current->signal->group_stop_count > 0)?
478 (p8)    mov r8=0                // clear error code
479
480         or r18=r18,r19          // r18 <- current->pending | current->signal->shared_pending
481         ;;
482         // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
483         andcm r18=r18,r14
484         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
485         ;;
486
487 (p7)    cmp.ne.or.andcm p6,p7=r18,r0            // p6/p7 <- signal pending
488         mov r19=0                                       // i must not leak kernel bits...
489 (p6)    br.cond.dpnt.many .sig_pending
490         ;;
491
492 1:      ld4 r17=[r9]                            // r17 <- current->thread_info->flags
493         ;;
494         mov ar.ccv=r17
495         and r18=~_TIF_SIGPENDING,r17            // r18 <- r17 & ~(1 << TIF_SIGPENDING)
496         ;;
497
498         st8 [r2]=r14                            // update current->blocked with new mask
499         cmpxchg4.acq r8=[r9],r18,ar.ccv         // current->thread_info->flags <- r18
500         ;;
501         cmp.ne p6,p0=r17,r8                     // update failed?
502 (p6)    br.cond.spnt.few 1b                     // yes -> retry
503
504 #ifdef CONFIG_SMP
505         // __ticket_spin_unlock(r31)
506         adds r31=2,r31
507         ;;
508         ld2.bias r2=[r31]
509         mov r3=65534
510         ;;
511         adds r2=2,r2
512         ;;
513         and r3=r3,r2
514         ;;
515         st2.rel [r31]=r3
516 #endif
517         SSM_PSR_I(p0, p9, r31)
518         ;;
519
520         srlz.d                                  // ensure psr.i is set again
521         mov r18=0                                       // i must not leak kernel bits...
522
523 .store_mask:
524 EX(.fail_efault, (p15) probe.w.fault r34, 3)    // verify user has write-access to *oset
525 EX(.fail_efault, (p15) st8 [r34]=r3)
526         mov r2=0                                        // i must not leak kernel bits...
527         mov r3=0                                        // i must not leak kernel bits...
528         mov r8=0                                // return 0
529         mov r9=0                                        // i must not leak kernel bits...
530         mov r14=0                                       // i must not leak kernel bits...
531         mov r17=0                                       // i must not leak kernel bits...
532         mov r31=0                                       // i must not leak kernel bits...
533         FSYS_RETURN
534
535 .sig_pending:
536 #ifdef CONFIG_SMP
537         // __ticket_spin_unlock(r31)
538         adds r31=2,r31
539         ;;
540         ld2.bias r2=[r31]
541         mov r3=65534
542         ;;
543         adds r2=2,r2
544         ;;
545         and r3=r3,r2
546         ;;
547         st2.rel [r31]=r3
548 #endif
549         SSM_PSR_I(p0, p9, r17)
550         ;;
551         srlz.d
552         br.sptk.many fsys_fallback_syscall      // with signal pending, do the heavy-weight syscall
553
554 #ifdef CONFIG_SMP
555 .lock_contention:
556         /* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
557         SSM_PSR_I(p0, p9, r17)
558         ;;
559         srlz.d
560         br.sptk.many fsys_fallback_syscall
561 #endif
562 END(fsys_rt_sigprocmask)
563
564 /*
565  * fsys_getcpu doesn't use the third parameter in this implementation. It reads
566  * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
567  */
568 ENTRY(fsys_getcpu)
569         .prologue
570         .altrp b6
571         .body
572         ;;
573         add r2=TI_FLAGS+IA64_TASK_SIZE,r16
574         tnat.nz p6,p0 = r32                     // guard against NaT argument
575         add r3=TI_CPU+IA64_TASK_SIZE,r16
576         ;;
577         ld4 r3=[r3]                             // M r3 = thread_info->cpu
578         ld4 r2=[r2]                             // M r2 = thread_info->flags
579 (p6)    br.cond.spnt.few .fail_einval           // B
580         ;;
581         tnat.nz p7,p0 = r33                     // I guard against NaT argument
582 (p7)    br.cond.spnt.few .fail_einval           // B
583 #ifdef CONFIG_NUMA
584         movl r17=cpu_to_node_map
585         ;;
586 EX(.fail_efault, probe.w.fault r32, 3)          // M This takes 5 cycles
587 EX(.fail_efault, probe.w.fault r33, 3)          // M This takes 5 cycles
588         shladd r18=r3,1,r17
589         ;;
590         ld2 r20=[r18]                           // r20 = cpu_to_node_map[cpu]
591         and r2 = TIF_ALLWORK_MASK,r2
592         ;;
593         cmp.ne p8,p0=0,r2
594 (p8)    br.spnt.many fsys_fallback_syscall
595         ;;
596         ;;
597 EX(.fail_efault, st4 [r32] = r3)
598 EX(.fail_efault, st2 [r33] = r20)
599         mov r8=0
600         ;;
601 #else
602 EX(.fail_efault, probe.w.fault r32, 3)          // M This takes 5 cycles
603 EX(.fail_efault, probe.w.fault r33, 3)          // M This takes 5 cycles
604         and r2 = TIF_ALLWORK_MASK,r2
605         ;;
606         cmp.ne p8,p0=0,r2
607 (p8)    br.spnt.many fsys_fallback_syscall
608         ;;
609 EX(.fail_efault, st4 [r32] = r3)
610 EX(.fail_efault, st2 [r33] = r0)
611         mov r8=0
612         ;;
613 #endif
614         FSYS_RETURN
615 END(fsys_getcpu)
616
617 ENTRY(fsys_fallback_syscall)
618         .prologue
619         .altrp b6
620         .body
621         /*
622          * We only get here from light-weight syscall handlers.  Thus, we already
623          * know that r15 contains a valid syscall number.  No need to re-check.
624          */
625         adds r17=-1024,r15
626         movl r14=sys_call_table
627         ;;
628         RSM_PSR_I(p0, r26, r27)
629         shladd r18=r17,3,r14
630         ;;
631         ld8 r18=[r18]                           // load normal (heavy-weight) syscall entry-point
632         MOV_FROM_PSR(p0, r29, r26)              // read psr (12 cyc load latency)
633         mov r27=ar.rsc
634         mov r21=ar.fpsr
635         mov r26=ar.pfs
636 END(fsys_fallback_syscall)
637         /* FALL THROUGH */
638 GLOBAL_ENTRY(paravirt_fsys_bubble_down)
639         .prologue
640         .altrp b6
641         .body
642         /*
643          * We get here for syscalls that don't have a lightweight
644          * handler.  For those, we need to bubble down into the kernel
645          * and that requires setting up a minimal pt_regs structure,
646          * and initializing the CPU state more or less as if an
647          * interruption had occurred.  To make syscall-restarts work,
648          * we setup pt_regs such that cr_iip points to the second
649          * instruction in syscall_via_break.  Decrementing the IP
650          * hence will restart the syscall via break and not
651          * decrementing IP will return us to the caller, as usual.
652          * Note that we preserve the value of psr.pp rather than
653          * initializing it from dcr.pp.  This makes it possible to
654          * distinguish fsyscall execution from other privileged
655          * execution.
656          *
657          * On entry:
658          *      - normal fsyscall handler register usage, except
659          *        that we also have:
660          *      - r18: address of syscall entry point
661          *      - r21: ar.fpsr
662          *      - r26: ar.pfs
663          *      - r27: ar.rsc
664          *      - r29: psr
665          *
666          * We used to clear some PSR bits here but that requires slow
667          * serialization.  Fortuntely, that isn't really necessary.
668          * The rationale is as follows: we used to clear bits
669          * ~PSR_PRESERVED_BITS in PSR.L.  Since
670          * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
671          * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
672          * However,
673          *
674          * PSR.BE : already is turned off in __kernel_syscall_via_epc()
675          * PSR.AC : don't care (kernel normally turns PSR.AC on)
676          * PSR.I  : already turned off by the time paravirt_fsys_bubble_down gets
677          *          invoked
678          * PSR.DFL: always 0 (kernel never turns it on)
679          * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
680          *          initiative
681          * PSR.DI : always 0 (kernel never turns it on)
682          * PSR.SI : always 0 (kernel never turns it on)
683          * PSR.DB : don't care --- kernel never enables kernel-level
684          *          breakpoints
685          * PSR.TB : must be 0 already; if it wasn't zero on entry to
686          *          __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
687          *          will trigger a taken branch; the taken-trap-handler then
688          *          converts the syscall into a break-based system-call.
689          */
690         /*
691          * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
692          * The rest we have to synthesize.
693          */
694 #       define PSR_ONE_BITS             ((3 << IA64_PSR_CPL0_BIT)       \
695                                          | (0x1 << IA64_PSR_RI_BIT)     \
696                                          | IA64_PSR_BN | IA64_PSR_I)
697
698         invala                                  // M0|1
699         movl r14=ia64_ret_from_syscall          // X
700
701         nop.m 0
702         movl r28=__kernel_syscall_via_break     // X    create cr.iip
703         ;;
704
705         mov r2=r16                              // A    get task addr to addl-addressable register
706         adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
707         mov r31=pr                              // I0   save pr (2 cyc)
708         ;;
709         st1 [r16]=r0                            // M2|3 clear current->thread.on_ustack flag
710         addl r22=IA64_RBS_OFFSET,r2             // A    compute base of RBS
711         add r3=TI_FLAGS+IA64_TASK_SIZE,r2       // A
712         ;;
713         ld4 r3=[r3]                             // M0|1 r3 = current_thread_info()->flags
714         lfetch.fault.excl.nt1 [r22]             // M0|1 prefetch register backing-store
715         nop.i 0
716         ;;
717         mov ar.rsc=0                            // M2   set enforced lazy mode, pl 0, LE, loadrs=0
718 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
719         MOV_FROM_ITC(p0, p6, r30, r23)          // M    get cycle for accounting
720 #else
721         nop.m 0
722 #endif
723         nop.i 0
724         ;;
725         mov r23=ar.bspstore                     // M2 (12 cyc) save ar.bspstore
726         mov.m r24=ar.rnat                       // M2 (5 cyc) read ar.rnat (dual-issues!)
727         nop.i 0
728         ;;
729         mov ar.bspstore=r22                     // M2 (6 cyc) switch to kernel RBS
730         movl r8=PSR_ONE_BITS                    // X
731         ;;
732         mov r25=ar.unat                         // M2 (5 cyc) save ar.unat
733         mov r19=b6                              // I0   save b6 (2 cyc)
734         mov r20=r1                              // A    save caller's gp in r20
735         ;;
736         or r29=r8,r29                           // A    construct cr.ipsr value to save
737         mov b6=r18                              // I0   copy syscall entry-point to b6 (7 cyc)
738         addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
739
740         mov r18=ar.bsp                          // M2   save (kernel) ar.bsp (12 cyc)
741         cmp.ne pKStk,pUStk=r0,r0                // A    set pKStk <- 0, pUStk <- 1
742         br.call.sptk.many b7=ia64_syscall_setup // B
743         ;;
744 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
745         // mov.m r30=ar.itc is called in advance
746         add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
747         add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
748         ;;
749         ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP   // time at last check in kernel
750         ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE   // time at leave kernel
751         ;;
752         ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME   // cumulated stime
753         ld8 r21=[r17]                           // cumulated utime
754         sub r22=r19,r18                         // stime before leave kernel
755         ;;
756         st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP   // update stamp
757         sub r18=r30,r19                         // elapsed time in user mode
758         ;;
759         add r20=r20,r22                         // sum stime
760         add r21=r21,r18                         // sum utime
761         ;;
762         st8 [r16]=r20                           // update stime
763         st8 [r17]=r21                           // update utime
764         ;;
765 #endif
766         mov ar.rsc=0x3                          // M2   set eager mode, pl 0, LE, loadrs=0
767         mov rp=r14                              // I0   set the real return addr
768         and r3=_TIF_SYSCALL_TRACEAUDIT,r3       // A
769         ;;
770         SSM_PSR_I(p0, p6, r22)                  // M2   we're on kernel stacks now, reenable irqs
771         cmp.eq p8,p0=r3,r0                      // A
772 (p10)   br.cond.spnt.many ia64_ret_from_syscall // B    return if bad call-frame or r15 is a NaT
773
774         nop.m 0
775 (p8)    br.call.sptk.many b6=b6                 // B    (ignore return address)
776         br.cond.spnt ia64_trace_syscall         // B
777 END(paravirt_fsys_bubble_down)
778
779         .rodata
780         .align 8
781         .globl paravirt_fsyscall_table
782
783         data8 paravirt_fsys_bubble_down
784 paravirt_fsyscall_table:
785         data8 fsys_ni_syscall
786         data8 0                         // exit                 // 1025
787         data8 0                         // read
788         data8 0                         // write
789         data8 0                         // open
790         data8 0                         // close
791         data8 0                         // creat                // 1030
792         data8 0                         // link
793         data8 0                         // unlink
794         data8 0                         // execve
795         data8 0                         // chdir
796         data8 0                         // fchdir               // 1035
797         data8 0                         // utimes
798         data8 0                         // mknod
799         data8 0                         // chmod
800         data8 0                         // chown
801         data8 0                         // lseek                // 1040
802         data8 fsys_getpid               // getpid
803         data8 fsys_getppid              // getppid
804         data8 0                         // mount
805         data8 0                         // umount
806         data8 0                         // setuid               // 1045
807         data8 0                         // getuid
808         data8 0                         // geteuid
809         data8 0                         // ptrace
810         data8 0                         // access
811         data8 0                         // sync                 // 1050
812         data8 0                         // fsync
813         data8 0                         // fdatasync
814         data8 0                         // kill
815         data8 0                         // rename
816         data8 0                         // mkdir                // 1055
817         data8 0                         // rmdir
818         data8 0                         // dup
819         data8 0                         // pipe
820         data8 0                         // times
821         data8 0                         // brk                  // 1060
822         data8 0                         // setgid
823         data8 0                         // getgid
824         data8 0                         // getegid
825         data8 0                         // acct
826         data8 0                         // ioctl                // 1065
827         data8 0                         // fcntl
828         data8 0                         // umask
829         data8 0                         // chroot
830         data8 0                         // ustat
831         data8 0                         // dup2                 // 1070
832         data8 0                         // setreuid
833         data8 0                         // setregid
834         data8 0                         // getresuid
835         data8 0                         // setresuid
836         data8 0                         // getresgid            // 1075
837         data8 0                         // setresgid
838         data8 0                         // getgroups
839         data8 0                         // setgroups
840         data8 0                         // getpgid
841         data8 0                         // setpgid              // 1080
842         data8 0                         // setsid
843         data8 0                         // getsid
844         data8 0                         // sethostname
845         data8 0                         // setrlimit
846         data8 0                         // getrlimit            // 1085
847         data8 0                         // getrusage
848         data8 fsys_gettimeofday         // gettimeofday
849         data8 0                         // settimeofday
850         data8 0                         // select
851         data8 0                         // poll                 // 1090
852         data8 0                         // symlink
853         data8 0                         // readlink
854         data8 0                         // uselib
855         data8 0                         // swapon
856         data8 0                         // swapoff              // 1095
857         data8 0                         // reboot
858         data8 0                         // truncate
859         data8 0                         // ftruncate
860         data8 0                         // fchmod
861         data8 0                         // fchown               // 1100
862         data8 0                         // getpriority
863         data8 0                         // setpriority
864         data8 0                         // statfs
865         data8 0                         // fstatfs
866         data8 0                         // gettid               // 1105
867         data8 0                         // semget
868         data8 0                         // semop
869         data8 0                         // semctl
870         data8 0                         // msgget
871         data8 0                         // msgsnd               // 1110
872         data8 0                         // msgrcv
873         data8 0                         // msgctl
874         data8 0                         // shmget
875         data8 0                         // shmat
876         data8 0                         // shmdt                // 1115
877         data8 0                         // shmctl
878         data8 0                         // syslog
879         data8 0                         // setitimer
880         data8 0                         // getitimer
881         data8 0                                                 // 1120
882         data8 0
883         data8 0
884         data8 0                         // vhangup
885         data8 0                         // lchown
886         data8 0                         // remap_file_pages     // 1125
887         data8 0                         // wait4
888         data8 0                         // sysinfo
889         data8 0                         // clone
890         data8 0                         // setdomainname
891         data8 0                         // newuname             // 1130
892         data8 0                         // adjtimex
893         data8 0
894         data8 0                         // init_module
895         data8 0                         // delete_module
896         data8 0                                                 // 1135
897         data8 0
898         data8 0                         // quotactl
899         data8 0                         // bdflush
900         data8 0                         // sysfs
901         data8 0                         // personality          // 1140
902         data8 0                         // afs_syscall
903         data8 0                         // setfsuid
904         data8 0                         // setfsgid
905         data8 0                         // getdents
906         data8 0                         // flock                // 1145
907         data8 0                         // readv
908         data8 0                         // writev
909         data8 0                         // pread64
910         data8 0                         // pwrite64
911         data8 0                         // sysctl               // 1150
912         data8 0                         // mmap
913         data8 0                         // munmap
914         data8 0                         // mlock
915         data8 0                         // mlockall
916         data8 0                         // mprotect             // 1155
917         data8 0                         // mremap
918         data8 0                         // msync
919         data8 0                         // munlock
920         data8 0                         // munlockall
921         data8 0                         // sched_getparam       // 1160
922         data8 0                         // sched_setparam
923         data8 0                         // sched_getscheduler
924         data8 0                         // sched_setscheduler
925         data8 0                         // sched_yield
926         data8 0                         // sched_get_priority_max       // 1165
927         data8 0                         // sched_get_priority_min
928         data8 0                         // sched_rr_get_interval
929         data8 0                         // nanosleep
930         data8 0                         // nfsservctl
931         data8 0                         // prctl                // 1170
932         data8 0                         // getpagesize
933         data8 0                         // mmap2
934         data8 0                         // pciconfig_read
935         data8 0                         // pciconfig_write
936         data8 0                         // perfmonctl           // 1175
937         data8 0                         // sigaltstack
938         data8 0                         // rt_sigaction
939         data8 0                         // rt_sigpending
940         data8 fsys_rt_sigprocmask       // rt_sigprocmask
941         data8 0                         // rt_sigqueueinfo      // 1180
942         data8 0                         // rt_sigreturn
943         data8 0                         // rt_sigsuspend
944         data8 0                         // rt_sigtimedwait
945         data8 0                         // getcwd
946         data8 0                         // capget               // 1185
947         data8 0                         // capset
948         data8 0                         // sendfile
949         data8 0
950         data8 0
951         data8 0                         // socket               // 1190
952         data8 0                         // bind
953         data8 0                         // connect
954         data8 0                         // listen
955         data8 0                         // accept
956         data8 0                         // getsockname          // 1195
957         data8 0                         // getpeername
958         data8 0                         // socketpair
959         data8 0                         // send
960         data8 0                         // sendto
961         data8 0                         // recv                 // 1200
962         data8 0                         // recvfrom
963         data8 0                         // shutdown
964         data8 0                         // setsockopt
965         data8 0                         // getsockopt
966         data8 0                         // sendmsg              // 1205
967         data8 0                         // recvmsg
968         data8 0                         // pivot_root
969         data8 0                         // mincore
970         data8 0                         // madvise
971         data8 0                         // newstat              // 1210
972         data8 0                         // newlstat
973         data8 0                         // newfstat
974         data8 0                         // clone2
975         data8 0                         // getdents64
976         data8 0                         // getunwind            // 1215
977         data8 0                         // readahead
978         data8 0                         // setxattr
979         data8 0                         // lsetxattr
980         data8 0                         // fsetxattr
981         data8 0                         // getxattr             // 1220
982         data8 0                         // lgetxattr
983         data8 0                         // fgetxattr
984         data8 0                         // listxattr
985         data8 0                         // llistxattr
986         data8 0                         // flistxattr           // 1225
987         data8 0                         // removexattr
988         data8 0                         // lremovexattr
989         data8 0                         // fremovexattr
990         data8 0                         // tkill
991         data8 0                         // futex                // 1230
992         data8 0                         // sched_setaffinity
993         data8 0                         // sched_getaffinity
994         data8 fsys_set_tid_address      // set_tid_address
995         data8 0                         // fadvise64_64
996         data8 0                         // tgkill               // 1235
997         data8 0                         // exit_group
998         data8 0                         // lookup_dcookie
999         data8 0                         // io_setup
1000         data8 0                         // io_destroy
1001         data8 0                         // io_getevents         // 1240
1002         data8 0                         // io_submit
1003         data8 0                         // io_cancel
1004         data8 0                         // epoll_create
1005         data8 0                         // epoll_ctl
1006         data8 0                         // epoll_wait           // 1245
1007         data8 0                         // restart_syscall
1008         data8 0                         // semtimedop
1009         data8 0                         // timer_create
1010         data8 0                         // timer_settime
1011         data8 0                         // timer_gettime        // 1250
1012         data8 0                         // timer_getoverrun
1013         data8 0                         // timer_delete
1014         data8 0                         // clock_settime
1015         data8 fsys_clock_gettime        // clock_gettime
1016         data8 0                         // clock_getres         // 1255
1017         data8 0                         // clock_nanosleep
1018         data8 0                         // fstatfs64
1019         data8 0                         // statfs64
1020         data8 0                         // mbind
1021         data8 0                         // get_mempolicy        // 1260
1022         data8 0                         // set_mempolicy
1023         data8 0                         // mq_open
1024         data8 0                         // mq_unlink
1025         data8 0                         // mq_timedsend
1026         data8 0                         // mq_timedreceive      // 1265
1027         data8 0                         // mq_notify
1028         data8 0                         // mq_getsetattr
1029         data8 0                         // kexec_load
1030         data8 0                         // vserver
1031         data8 0                         // waitid               // 1270
1032         data8 0                         // add_key
1033         data8 0                         // request_key
1034         data8 0                         // keyctl
1035         data8 0                         // ioprio_set
1036         data8 0                         // ioprio_get           // 1275
1037         data8 0                         // move_pages
1038         data8 0                         // inotify_init
1039         data8 0                         // inotify_add_watch
1040         data8 0                         // inotify_rm_watch
1041         data8 0                         // migrate_pages        // 1280
1042         data8 0                         // openat
1043         data8 0                         // mkdirat
1044         data8 0                         // mknodat
1045         data8 0                         // fchownat
1046         data8 0                         // futimesat            // 1285
1047         data8 0                         // newfstatat
1048         data8 0                         // unlinkat
1049         data8 0                         // renameat
1050         data8 0                         // linkat
1051         data8 0                         // symlinkat            // 1290
1052         data8 0                         // readlinkat
1053         data8 0                         // fchmodat
1054         data8 0                         // faccessat
1055         data8 0
1056         data8 0                                                 // 1295
1057         data8 0                         // unshare
1058         data8 0                         // splice
1059         data8 0                         // set_robust_list
1060         data8 0                         // get_robust_list
1061         data8 0                         // sync_file_range      // 1300
1062         data8 0                         // tee
1063         data8 0                         // vmsplice
1064         data8 0
1065         data8 fsys_getcpu               // getcpu               // 1304
1066
1067         // fill in zeros for the remaining entries
1068         .zero:
1069         .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0