使用python编写一个函数，能够根据均值、标准差、偏度和峰度生成n个随机数

问题遇到的现象和发生背景

使用python编写一个函数，能够根据均值、标准差、偏度和峰度生成n个随机数，这n个随机数计算的均值、标准差、偏度和峰度误差和期望的在一定范围之内

import numpy as np
from scipy import stats
# 任务，写一个函数，能够根据均值mu,标准差sigma,偏度skew,峰度kurtosis产生随机的数列，数列计算出来的均值、标准差、偏度、峰度误差在一定程度之内
def generate_data(mu,sigma,skew,kurtosis,size=10000):
    pass 
x = generate_data(mu,sigma,skew,kurtosis,size=10000)
cal_mu = np.mean(x)
cal_sigma = np.std(x)
cal_skew = stats.skew(x)
cal_kurtosis = stats.kurtosis(x)
assert abs(cal_mu - mu)<0.001
assert abs(cal_sigma - sigma)<0.001
assert abs(cal_skew - skew)<0.001
assert abs(cal_kurtosis - kurtosis)<0.001

暴力破解


import numpy as np
from scipy import stats


# 任务，写一个函数，能够根据均值mu,标准差sigma,偏度skew,峰度kurtosis产生随机的数列，数列计算出来的均值、标准差、偏度、峰度误差在一定程度之内
def generate_data(mu, sigma, skew, kurtosis, size=10000):
    while True:
        try:
            x = np.random.normal(mu, sigma, size=size)
            cal_mu = np.mean(x, axis=0)
            cal_sigma = np.std(x, axis=0)
            cal_skew = stats.skew(x)
            cal_kurtosis = stats.kurtosis(x)
            # print(mu, sigma, skew, kurtosis)
            assert abs(cal_mu - mu) < 0.001
            print(cal_mu)
            assert abs(cal_sigma - sigma) < 0.001
            print(cal_sigma)
            assert abs(cal_skew - skew) < 0.001
            print(cal_skew)
            assert abs(cal_kurtosis - kurtosis) < 0.001
            print(cal_kurtosis)
            return x
        except:
            pass


mu, sigma, skew, kurtosis=0.6,0.5,0.003,0.04
x = generate_data(mu, sigma, skew, kurtosis, size=10000)
cal_mu = np.mean(x, axis=0)
cal_sigma = np.std(x, axis=0)
cal_skew = stats.skew(x)
cal_kurtosis = stats.kurtosis(x)
assert abs(cal_mu - mu) < 0.001
assert abs(cal_sigma - sigma) < 0.001
assert abs(cal_skew - skew) < 0.001
assert abs(cal_kurtosis - kurtosis) < 0.001
print(x)

可以参考一下做法：

我首先生成PDF曲线以供参考：

NUM_SAMPLES = 100000

SKEW_PARAMS = [-3, 0]

def skew_norm_pdf(x,e=0,w=1,a=0):

adapated from:

https://stackoverflow.com/questions/5884768/skew-normal-distribution-in-scipy

t = (x-e) / w

return 2.0 * w * stats.norm.pdf(t) * stats.norm.cdf(a*t)

generate the skew normal PDF for reference:

location = 0.0

scale = 1.0

x = np.linspace(-5,5,100)

plt.subplots(figsize=(12,4))

for alpha_skew in SKEW_PARAMS:

p = skew_norm_pdf(x,location,scale,alpha_skew)

n.b. note that alpha is a parameter that controls skew, but the 'skewness'

as measured will be different. see the wikipedia page:

https://en.wikipedia.org/wiki/Skew_normal_distribution

plt.plot(x,p)

OOmUd.png

接下来,我发现了一个从偏斜正态分布中采样随机数的VB实现,并将其转换为python：

literal adaption from:

https://stackoverflow.com/questions/4643285/how-to-generate-random-numbers-that-follow-skew-normal-distribution-in-matlab

original at:

http://www.ozgrid.com/forum/showthread.php?t=108175

def rand_skew_norm(fAlpha, fLocation, fScale):

sigma = fAlpha / np.sqrt(1.0 + fAlpha**2)

afRN = np.random.randn(2)

u0 = afRN[0]

v = afRN[1]

u1 = sigma*u0 + np.sqrt(1.0 -sigma**2) * v

if u0 >= 0:

return u1*fScale + fLocation

return (-u1)*fScale + fLocation

def randn_skew(N, skew=0.0):

return [rand_skew_norm(skew, 0, 1) for x in range(N)]

lets check they at least visually match the PDF:

plt.subplots(figsize=(12,4))

for alpha_skew in SKEW_PARAMS:

p = randn_skew(NUM_SAMPLES, alpha_skew)

sns.distplot(p)

QzBFV.png

然后写了一个快速版本(没有经过大量测试)似乎是正确的：

def randn_skew_fast(N, alpha=0.0, loc=0.0, scale=1.0):

sigma = alpha / np.sqrt(1.0 + alpha**2)

u0 = np.random.randn(N)

v = np.random.randn(N)

u1 = (sigma*u0 + np.sqrt(1.0 - sigma**2)*v) * scale

u1[u0 < 0] *= -1

u1 = u1 + loc

return u1

lets check again

plt.subplots(figsize=(12,4))

for alpha_skew in SKEW_PARAMS:

p = randn_skew_fast(NUM_SAMPLES, alpha_skew)

sns.distplot(p)

# -*- coding: utf-8 -*-
# @Time     : 2022-10-14 14:03
# @Author   : hyh
# @File     : test1014.py
# @Software : PyCharm


import numpy as np
from scipy import stats
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm


def calc_statistics(x):
    n = x.shape[0]  # 样本个数

    # 手动计算
    # 分别表示各个k阶矩
    m = 0
    m2 = 0
    m3 = 0
    m4 = 0
    for t in x:
        m += t
        m2 += t * t
        m3 += t ** 3
        m4 += t ** 4
    m /= n
    m2 /= n
    m3 /= n
    m4 /= n
    # 代入公式求个值
    mu = m
    sigma = np.sqrt(m2 - mu * mu)
    skew = (m3 - 3 * mu * m2 + 2 * mu ** 3) / sigma ** 3
    kurtosis = (m4 - 4 * mu * m3 + 6 * mu * mu * m2 - 4 * mu ** 3 * mu + mu ** 4) / sigma ** 4 - 3
    print('手动计算均值、标准差、偏度、峰度：', mu, sigma, skew, kurtosis)

    # 使用系统函数验证
    mu = np.mean(x, axis=0)
    sigma = np.std(x, axis=0)
    skew = stats.skew(x)
    kurtosis = stats.kurtosis(x)
    return mu, sigma, skew, kurtosis


if __name__ == '__main__':
    d = np.random.randn(100000)
    print(d)
    mu, sigma, skew, kurtosis = calc_statistics(d)
    print('函数库计算均值、标准差、偏度、峰度：', mu, sigma, skew, kurtosis)

我来教你如何使用