Coverage Report

Created: 2024-09-06 07:53

/src/ffmpeg/libavcodec/x86/lpc_init.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * SIMD-optimized LPC functions
3
 * Copyright (c) 2007 Loren Merritt
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
#include "libavutil/attributes.h"
23
#include "libavutil/x86/asm.h"
24
#include "libavutil/x86/cpu.h"
25
#include "libavcodec/lpc.h"
26
27
void ff_lpc_apply_welch_window_sse2(const int32_t *data, ptrdiff_t len,
28
                                    double *w_data);
29
void ff_lpc_apply_welch_window_avx2(const int32_t *data, ptrdiff_t len,
30
                                    double *w_data);
31
32
DECLARE_ASM_CONST(16, double, pd_1)[2] = { 1.0, 1.0 };
33
34
#if HAVE_SSE2_INLINE
35
36
static void lpc_compute_autocorr_sse2(const double *data, ptrdiff_t len, int lag,
37
                                      double *autoc)
38
0
{
39
0
    int j;
40
41
0
    if((x86_reg)data & 15)
42
0
        data++;
43
44
0
    for(j=0; j<lag; j+=2){
45
0
        x86_reg i = -len*sizeof(double);
46
0
        if(j == lag-2) {
47
0
            __asm__ volatile(
48
0
                "movsd    "MANGLE(pd_1)", %%xmm0    \n\t"
49
0
                "movsd    "MANGLE(pd_1)", %%xmm1    \n\t"
50
0
                "movsd    "MANGLE(pd_1)", %%xmm2    \n\t"
51
0
                "1:                                 \n\t"
52
0
                "movapd   (%2,%0), %%xmm3           \n\t"
53
0
                "movupd -8(%3,%0), %%xmm4           \n\t"
54
0
                "movapd   (%3,%0), %%xmm5           \n\t"
55
0
                "mulpd     %%xmm3, %%xmm4           \n\t"
56
0
                "mulpd     %%xmm3, %%xmm5           \n\t"
57
0
                "mulpd -16(%3,%0), %%xmm3           \n\t"
58
0
                "addpd     %%xmm4, %%xmm1           \n\t"
59
0
                "addpd     %%xmm5, %%xmm0           \n\t"
60
0
                "addpd     %%xmm3, %%xmm2           \n\t"
61
0
                "add       $16,    %0               \n\t"
62
0
                "jl 1b                              \n\t"
63
0
                "movhlps   %%xmm0, %%xmm3           \n\t"
64
0
                "movhlps   %%xmm1, %%xmm4           \n\t"
65
0
                "movhlps   %%xmm2, %%xmm5           \n\t"
66
0
                "addsd     %%xmm3, %%xmm0           \n\t"
67
0
                "addsd     %%xmm4, %%xmm1           \n\t"
68
0
                "addsd     %%xmm5, %%xmm2           \n\t"
69
0
                "movsd     %%xmm0,   (%1)           \n\t"
70
0
                "movsd     %%xmm1,  8(%1)           \n\t"
71
0
                "movsd     %%xmm2, 16(%1)           \n\t"
72
0
                :"+&r"(i)
73
0
                :"r"(autoc+j), "r"(data+len), "r"(data+len-j)
74
0
                 NAMED_CONSTRAINTS_ARRAY_ADD(pd_1)
75
0
                :"memory"
76
0
            );
77
0
        } else {
78
0
            __asm__ volatile(
79
0
                "movsd    "MANGLE(pd_1)", %%xmm0    \n\t"
80
0
                "movsd    "MANGLE(pd_1)", %%xmm1    \n\t"
81
0
                "1:                                 \n\t"
82
0
                "movapd   (%3,%0), %%xmm3           \n\t"
83
0
                "movupd -8(%4,%0), %%xmm4           \n\t"
84
0
                "mulpd     %%xmm3, %%xmm4           \n\t"
85
0
                "mulpd    (%4,%0), %%xmm3           \n\t"
86
0
                "addpd     %%xmm4, %%xmm1           \n\t"
87
0
                "addpd     %%xmm3, %%xmm0           \n\t"
88
0
                "add       $16,    %0               \n\t"
89
0
                "jl 1b                              \n\t"
90
0
                "movhlps   %%xmm0, %%xmm3           \n\t"
91
0
                "movhlps   %%xmm1, %%xmm4           \n\t"
92
0
                "addsd     %%xmm3, %%xmm0           \n\t"
93
0
                "addsd     %%xmm4, %%xmm1           \n\t"
94
0
                "movsd     %%xmm0, %1               \n\t"
95
0
                "movsd     %%xmm1, %2               \n\t"
96
0
                :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
97
0
                :"r"(data+len), "r"(data+len-j)
98
0
                 NAMED_CONSTRAINTS_ARRAY_ADD(pd_1)
99
0
            );
100
0
        }
101
0
    }
102
0
}
103
104
#endif /* HAVE_SSE2_INLINE */
105
106
av_cold void ff_lpc_init_x86(LPCContext *c)
107
0
{
108
0
    int cpu_flags = av_get_cpu_flags();
109
110
0
#if HAVE_SSE2_INLINE
111
0
    if (INLINE_SSE2_SLOW(cpu_flags))
112
0
        c->lpc_compute_autocorr = lpc_compute_autocorr_sse2;
113
0
#endif
114
115
0
    if (EXTERNAL_SSE2(cpu_flags))
116
0
        c->lpc_apply_welch_window = ff_lpc_apply_welch_window_sse2;
117
118
0
    if (EXTERNAL_AVX2(cpu_flags))
119
0
        c->lpc_apply_welch_window = ff_lpc_apply_welch_window_avx2;
120
0
}