Skip to content

Speech Coding

Companding (A-law, mu-law) and telephone-band filtering.

v_lin2pcma

V_LIN2PCMA - Convert linear PCM to A-law.

v_lin2pcma

v_lin2pcma(x, m=85, s=2017.396342) -> ndarray

Convert linear signal to A-law PCM values.

Parameters:

Name Type Description Default
x array_like

Input signal values.

required
m int

XOR mask value applied to output. Default is 85.

85
s float

Scale factor applied to input values. Default is 2017.396342 (ITU G.711 standard: sqrt((1120^2 + 2624^2)/2)).

Common scale factors: s=1 : input range +-4096 s=2017.40 : input range +-2.03033976 (default, 0 dBm0) s=4096 : input range +-1

2017.396342

Returns:

Name Type Description
p ndarray

A-law PCM values in the range 0 to 255.

Source code in pyvoicebox/v_lin2pcma.py
def v_lin2pcma(x, m=85, s=2017.396342) -> np.ndarray:
    """Convert linear signal to A-law PCM values.

    Parameters
    ----------
    x : array_like
        Input signal values.
    m : int, optional
        XOR mask value applied to output. Default is 85.
    s : float, optional
        Scale factor applied to input values. Default is 2017.396342
        (ITU G.711 standard: sqrt((1120^2 + 2624^2)/2)).

        Common scale factors:
            s=1       : input range +-4096
            s=2017.40 : input range +-2.03033976 (default, 0 dBm0)
            s=4096    : input range +-1

    Returns
    -------
    p : ndarray
        A-law PCM values in the range 0 to 255.
    """
    x = np.asarray(x, dtype=float)
    # pow2(s, -6) = s * 2^(-6) = s / 64
    y = x * (s / 64.0)
    # Clip to +-63
    y = (np.abs(y + 63) - np.abs(y - 63)) / 2.0
    # Sign: q=1 for positive, q=0 for negative
    q = np.floor((y + 64) / 64)
    # Decompose |y| into mantissa and exponent
    a, e = np.frexp(np.abs(y))
    # a is in [0.5, 1), e is integer such that |y| = a * 2^e
    # d = max(e, 0) -- only keep non-negative exponents
    d = ((e + np.abs(e)) / 2).astype(int)
    # pow2(a, e - d + 5) = a * 2^(e - d + 5)
    p = 128 * q + 16 * d + np.floor(np.ldexp(a, e - d + 5))
    p = p.astype(int)
    if m:
        p = np.bitwise_xor(p, m)
    return p.astype(np.float64)

v_pcma2lin

V_PCMA2LIN - Convert A-law PCM to linear.

v_pcma2lin

v_pcma2lin(p, m=85, s=None) -> ndarray

Convert A-law PCM values to linear signal.

Parameters:

Name Type Description Default
p array_like

A-law PCM values in the range 0 to 255.

required
m int

XOR mask applied to input values. Default is 85.

85
s float

Scale factor for output division. Default follows ITU G.711 (equivalent to s = 2017.396342).

Common scale factors: s=1 : output range +-4032 s=2017.40 : output range +-1.998616 (default, 0 dBm0) s=4032 : output range +-1 s=4096 : output range +-0.984375

None

Returns:

Name Type Description
x ndarray

Linear signal values.

Source code in pyvoicebox/v_pcma2lin.py
def v_pcma2lin(p, m=85, s=None) -> np.ndarray:
    """Convert A-law PCM values to linear signal.

    Parameters
    ----------
    p : array_like
        A-law PCM values in the range 0 to 255.
    m : int, optional
        XOR mask applied to input values. Default is 85.
    s : float, optional
        Scale factor for output division. Default follows ITU G.711
        (equivalent to s = 2017.396342).

        Common scale factors:
            s=1       : output range +-4032
            s=2017.40 : output range +-1.998616 (default, 0 dBm0)
            s=4032    : output range +-1
            s=4096    : output range +-0.984375

    Returns
    -------
    x : ndarray
        Linear signal values.
    """
    p = np.asarray(p, dtype=float)

    if s is None:
        t = 4.95688418e-4
    else:
        t = 1.0 / s

    if m:
        q = np.bitwise_xor(p.astype(int), m).astype(float)
    else:
        q = p.copy()

    k = np.mod(q, 16)
    g = np.floor(q / 128)
    e = (q - k - 128 * g) / 16
    f = (np.abs(e - 1) - e + 1) / 2.0
    # pow2(k+16.5, e) = (k+16.5) * 2^e
    e_int = e.astype(int)
    x = (2 * g - 1) * (np.ldexp(k + 16.5, e_int) + f * (k - 15.5)) * t
    return x

v_lin2pcmu

V_LIN2PCMU - Convert linear to Mu-law PCM.

Attempt at a faithful port of voicebox/v_lin2pcmu.m

v_lin2pcmu

v_lin2pcmu(x, s=4004.189931) -> ndarray

Convert linear signal to Mu-law PCM values.

Parameters:

Name Type Description Default
x array_like

Input signal values.

required
s float

Scale factor applied to input values. Default is 4004.189931 (ITU G.711 standard: sqrt((2207^2 + 5215^2)/2)).

Common scale factors: s=1 : input range +-8159 s=4004.19 : input range +-2.03761563 (default, 0 dBm0) s=8159 : input range +-1

4004.189931

Returns:

Name Type Description
p ndarray

Mu-law PCM values in the range 0 to 255.

Source code in pyvoicebox/v_lin2pcmu.py
def v_lin2pcmu(x, s=4004.189931) -> np.ndarray:
    """Convert linear signal to Mu-law PCM values.

    Parameters
    ----------
    x : array_like
        Input signal values.
    s : float, optional
        Scale factor applied to input values. Default is 4004.189931
        (ITU G.711 standard: sqrt((2207^2 + 5215^2)/2)).

        Common scale factors:
            s=1       : input range +-8159
            s=4004.19 : input range +-2.03761563 (default, 0 dBm0)
            s=8159    : input range +-1

    Returns
    -------
    p : ndarray
        Mu-law PCM values in the range 0 to 255.
    """
    x = np.asarray(x, dtype=float)
    y = x * s
    # Clip to +-8031
    y = (np.abs(y + 8031) - np.abs(y - 8031)) / 2.0
    # Sign: q=1 for positive, q=0 for negative
    q = np.floor((y + 8032) / 8032)
    # Decompose |y|+33 into mantissa and exponent (base-2 log)
    m, e = np.frexp(np.abs(y) + 33)
    # m is in [0.5, 1), e is integer such that |y|+33 = m * 2^e
    # MATLAB log2 returns same (m,e) as Python frexp
    p = 175 + 128 * q - 8 * (e + np.abs(e - 6)) - np.floor(32 * m - 16)
    return p.astype(np.float64)

v_pcmu2lin

V_PCMU2LIN - Convert Mu-law PCM to linear.

v_pcmu2lin

v_pcmu2lin(p, s=None) -> ndarray

Convert Mu-law PCM values to linear signal.

Parameters:

Name Type Description Default
p array_like

Mu-law PCM values in the range 0 to 255.

required
s float

Scale factor for output division. Default follows ITU G.711 (equivalent to s = 4004.189931).

Common scale factors: s=1 : output range +-8031 s=4004.19 : output range +-2.005649 (default, 0 dBm0) s=8031 : output range +-1 s=8159 : output range +-0.9843118

None

Returns:

Name Type Description
x ndarray

Linear signal values.

Source code in pyvoicebox/v_pcmu2lin.py
def v_pcmu2lin(p, s=None) -> np.ndarray:
    """Convert Mu-law PCM values to linear signal.

    Parameters
    ----------
    p : array_like
        Mu-law PCM values in the range 0 to 255.
    s : float, optional
        Scale factor for output division. Default follows ITU G.711
        (equivalent to s = 4004.189931).

        Common scale factors:
            s=1       : output range +-8031
            s=4004.19 : output range +-2.005649 (default, 0 dBm0)
            s=8031    : output range +-1
            s=8159    : output range +-0.9843118

    Returns
    -------
    x : ndarray
        Linear signal values.
    """
    p = np.asarray(p, dtype=float)

    if s is None:
        t = 9.98953613e-4
    else:
        t = 4.0 / s

    m = 15 - np.mod(p, 16)         # MATLAB rem for non-negative is same as mod
    q = np.floor(p / 128)
    e = (127 - p - m + 128 * q) / 16
    # pow2(f, e) in MATLAB = f * 2^e = np.ldexp(f, e)
    x = (q - 0.5) * (np.ldexp(m + 16.5, e.astype(int)) - 16.5) * t
    return x

v_potsband

V_POTSBAND - Design 300-3400 Hz telephone bandwidth filter.

v_potsband

v_potsband(fs) -> tuple[ndarray, ndarray]

Design filter for 300-3400 Hz telephone bandwidth.

Parameters:

Name Type Description Default
fs float

Sample frequency in Hz.

required

Returns:

Name Type Description
b ndarray

Numerator coefficients.

a ndarray

Denominator coefficients.

Source code in pyvoicebox/v_potsband.py
def v_potsband(fs) -> tuple[np.ndarray, np.ndarray]:
    """Design filter for 300-3400 Hz telephone bandwidth.

    Parameters
    ----------
    fs : float
        Sample frequency in Hz.

    Returns
    -------
    b : ndarray
        Numerator coefficients.
    a : ndarray
        Denominator coefficients.
    """
    # s-plane zeros and poles of 3rd order Chebyshev type 2 highpass filter
    szp_z = np.array([0, 0.19892796195357j, -0.19892796195357j])
    szp_p = np.array([-0.97247143137874,
                      -0.48623571568937 + 0.86535995266875j,
                      -0.48623571568937 - 0.86535995266875j])

    # High pass: bilinear transform with frequency warping for 300 Hz
    tan_val = np.tan(300 * np.pi / fs)
    zl_z = 2.0 / (1 - szp_z * tan_val) - 1
    zl_p = 2.0 / (1 - szp_p * tan_val) - 1

    al = np.real(np.poly(zl_p))
    bl = np.real(np.poly(zl_z))
    # Adjust gain at Nyquist
    sw = np.array([1, -1, 1, -1])
    bl = bl * (al @ sw) / (bl @ sw)

    # Low pass: bilinear transform with frequency warping for 3400 Hz
    tan_val_h = np.tan(3400 * np.pi / fs)
    zh_z = 2.0 / (szp_z / tan_val_h - 1) + 1
    zh_p = 2.0 / (szp_p / tan_val_h - 1) + 1

    ah = np.real(np.poly(zh_p))
    bh = np.real(np.poly(zh_z))
    bh = bh * np.sum(ah) / np.sum(bh)

    b = np.convolve(bh, bl)
    a = np.convolve(ah, al)
    return b, a