| /* | |
| * Alpha optimized DSP utils | |
| * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | |
| * | |
| * This file is part of FFmpeg. | |
| * | |
| * FFmpeg is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU Lesser General Public | |
| * License as published by the Free Software Foundation; either | |
| * version 2.1 of the License, or (at your option) any later version. | |
| * | |
| * FFmpeg is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| * Lesser General Public License for more details. | |
| * | |
| * You should have received a copy of the GNU Lesser General Public | |
| * License along with FFmpeg; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| */ | |
| #include "regdef.h" | |
| /* Some nicer register names. */ | |
| #define ta t10 | |
| #define tb t11 | |
| #define tc t12 | |
| #define td AT | |
| /* Danger: these overlap with the argument list and the return value */ | |
| #define te a5 | |
| #define tf a4 | |
| #define tg a3 | |
| #define th v0 | |
| noat | |
| noreorder | |
| .arch pca56 | |
| /***************************************************************************** | |
| * int pix_abs16x16_mvi_asm(const uint8_t *pix1, const uint8_t *pix2, int line_size) | |
| * | |
| * This code is written with a pca56 in mind. For ev6, one should | |
| * really take the increased latency of 3 cycles for MVI instructions | |
| * into account. | |
| * | |
| * It is important to keep the loading and first use of a register as | |
| * far apart as possible, because if a register is accessed before it | |
| * has been fetched from memory, the CPU will stall. | |
| */ | |
| 4 | |
| .globl pix_abs16x16_mvi_asm | |
| .ent pix_abs16x16_mvi_asm | |
| pix_abs16x16_mvi_asm: | |
| .frame sp, 0, ra, 0 | |
| .prologue 0 | |
| and a2, 7, t0 | |
| clr v0 | |
| beq t0, $aligned | |
| 4 | |
| $unaligned: | |
| /* Registers: | |
| line 0: | |
| t0: left_u -> left lo -> left | |
| t1: mid | |
| t2: right_u -> right hi -> right | |
| t3: ref left | |
| t4: ref right | |
| line 1: | |
| t5: left_u -> left lo -> left | |
| t6: mid | |
| t7: right_u -> right hi -> right | |
| t8: ref left | |
| t9: ref right | |
| temp: | |
| ta: left hi | |
| tb: right lo | |
| tc: error left | |
| td: error right */ | |
| /* load line 0 */ | |
| ldq_u t0, 0(a2) # left_u | |
| ldq_u t1, 8(a2) # mid | |
| ldq_u t2, 16(a2) # right_u | |
| ldq t3, 0(a1) # ref left | |
| ldq t4, 8(a1) # ref right | |
| addq a1, a3, a1 # pix1 | |
| addq a2, a3, a2 # pix2 | |
| /* load line 1 */ | |
| ldq_u t5, 0(a2) # left_u | |
| ldq_u t6, 8(a2) # mid | |
| ldq_u t7, 16(a2) # right_u | |
| ldq t8, 0(a1) # ref left | |
| ldq t9, 8(a1) # ref right | |
| addq a1, a3, a1 # pix1 | |
| addq a2, a3, a2 # pix2 | |
| /* calc line 0 */ | |
| extql t0, a2, t0 # left lo | |
| extqh t1, a2, ta # left hi | |
| extql t1, a2, tb # right lo | |
| or t0, ta, t0 # left | |
| extqh t2, a2, t2 # right hi | |
| perr t3, t0, tc # error left | |
| or t2, tb, t2 # right | |
| perr t4, t2, td # error right | |
| addq v0, tc, v0 # add error left | |
| addq v0, td, v0 # add error left | |
| /* calc line 1 */ | |
| extql t5, a2, t5 # left lo | |
| extqh t6, a2, ta # left hi | |
| extql t6, a2, tb # right lo | |
| or t5, ta, t5 # left | |
| extqh t7, a2, t7 # right hi | |
| perr t8, t5, tc # error left | |
| or t7, tb, t7 # right | |
| perr t9, t7, td # error right | |
| addq v0, tc, v0 # add error left | |
| addq v0, td, v0 # add error left | |
| /* loop */ | |
| subq a4, 2, a4 # h -= 2 | |
| bne a4, $unaligned | |
| ret | |
| 4 | |
| $aligned: | |
| /* load line 0 */ | |
| ldq t0, 0(a2) # left | |
| ldq t1, 8(a2) # right | |
| addq a2, a3, a2 # pix2 | |
| ldq t2, 0(a1) # ref left | |
| ldq t3, 8(a1) # ref right | |
| addq a1, a3, a1 # pix1 | |
| /* load line 1 */ | |
| ldq t4, 0(a2) # left | |
| ldq t5, 8(a2) # right | |
| addq a2, a3, a2 # pix2 | |
| ldq t6, 0(a1) # ref left | |
| ldq t7, 8(a1) # ref right | |
| addq a1, a3, a1 # pix1 | |
| /* load line 2 */ | |
| ldq t8, 0(a2) # left | |
| ldq t9, 8(a2) # right | |
| addq a2, a3, a2 # pix2 | |
| ldq ta, 0(a1) # ref left | |
| ldq tb, 8(a1) # ref right | |
| addq a1, a3, a1 # pix1 | |
| /* load line 3 */ | |
| ldq tc, 0(a2) # left | |
| ldq td, 8(a2) # right | |
| addq a2, a3, a2 # pix2 | |
| ldq te, 0(a1) # ref left | |
| ldq a0, 8(a1) # ref right | |
| /* calc line 0 */ | |
| perr t0, t2, t0 # error left | |
| addq a1, a3, a1 # pix1 | |
| perr t1, t3, t1 # error right | |
| addq v0, t0, v0 # add error left | |
| /* calc line 1 */ | |
| perr t4, t6, t0 # error left | |
| addq v0, t1, v0 # add error right | |
| perr t5, t7, t1 # error right | |
| addq v0, t0, v0 # add error left | |
| /* calc line 2 */ | |
| perr t8, ta, t0 # error left | |
| addq v0, t1, v0 # add error right | |
| perr t9, tb, t1 # error right | |
| addq v0, t0, v0 # add error left | |
| /* calc line 3 */ | |
| perr tc, te, t0 # error left | |
| addq v0, t1, v0 # add error right | |
| perr td, a0, t1 # error right | |
| addq v0, t0, v0 # add error left | |
| addq v0, t1, v0 # add error right | |
| /* loop */ | |
| subq a4, 4, a4 # h -= 4 | |
| bne a4, $aligned | |
| ret | |
| pix_abs16x16_mvi_asm | |