使用python编写一个函数,能够根据均值、标准差、偏度和峰度生成n个随机数,这n个随机数计算的均值、标准差、偏度和峰度误差和期望的在一定范围之内
import numpy as np
from scipy import stats
# 任务,写一个函数,能够根据均值mu,标准差sigma,偏度skew,峰度kurtosis产生随机的数列,数列计算出来的均值、标准差、偏度、峰度误差在一定程度之内
def generate_data(mu,sigma,skew,kurtosis,size=10000):
pass
x = generate_data(mu,sigma,skew,kurtosis,size=10000)
cal_mu = np.mean(x)
cal_sigma = np.std(x)
cal_skew = stats.skew(x)
cal_kurtosis = stats.kurtosis(x)
assert abs(cal_mu - mu)<0.001
assert abs(cal_sigma - sigma)<0.001
assert abs(cal_skew - skew)<0.001
assert abs(cal_kurtosis - kurtosis)<0.001
暴力破解
import numpy as np
from scipy import stats
# 任务,写一个函数,能够根据均值mu,标准差sigma,偏度skew,峰度kurtosis产生随机的数列,数列计算出来的均值、标准差、偏度、峰度误差在一定程度之内
def generate_data(mu, sigma, skew, kurtosis, size=10000):
while True:
try:
x = np.random.normal(mu, sigma, size=size)
cal_mu = np.mean(x, axis=0)
cal_sigma = np.std(x, axis=0)
cal_skew = stats.skew(x)
cal_kurtosis = stats.kurtosis(x)
# print(mu, sigma, skew, kurtosis)
assert abs(cal_mu - mu) < 0.001
print(cal_mu)
assert abs(cal_sigma - sigma) < 0.001
print(cal_sigma)
assert abs(cal_skew - skew) < 0.001
print(cal_skew)
assert abs(cal_kurtosis - kurtosis) < 0.001
print(cal_kurtosis)
return x
except:
pass
mu, sigma, skew, kurtosis=0.6,0.5,0.003,0.04
x = generate_data(mu, sigma, skew, kurtosis, size=10000)
cal_mu = np.mean(x, axis=0)
cal_sigma = np.std(x, axis=0)
cal_skew = stats.skew(x)
cal_kurtosis = stats.kurtosis(x)
assert abs(cal_mu - mu) < 0.001
assert abs(cal_sigma - sigma) < 0.001
assert abs(cal_skew - skew) < 0.001
assert abs(cal_kurtosis - kurtosis) < 0.001
print(x)
可以参考一下做法:
我首先生成PDF曲线以供参考:
NUM_SAMPLES = 100000
SKEW_PARAMS = [-3, 0]
def skew_norm_pdf(x,e=0,w=1,a=0):
t = (x-e) / w
return 2.0 * w * stats.norm.pdf(t) * stats.norm.cdf(a*t)
location = 0.0
scale = 1.0
x = np.linspace(-5,5,100)
plt.subplots(figsize=(12,4))
for alpha_skew in SKEW_PARAMS:
p = skew_norm_pdf(x,location,scale,alpha_skew)
plt.plot(x,p)
OOmUd.png
接下来,我发现了一个从偏斜正态分布中采样随机数的VB实现,并将其转换为python:
def rand_skew_norm(fAlpha, fLocation, fScale):
sigma = fAlpha / np.sqrt(1.0 + fAlpha**2)
afRN = np.random.randn(2)
u0 = afRN[0]
v = afRN[1]
u1 = sigma*u0 + np.sqrt(1.0 -sigma**2) * v
if u0 >= 0:
return u1*fScale + fLocation
return (-u1)*fScale + fLocation
def randn_skew(N, skew=0.0):
return [rand_skew_norm(skew, 0, 1) for x in range(N)]
plt.subplots(figsize=(12,4))
for alpha_skew in SKEW_PARAMS:
p = randn_skew(NUM_SAMPLES, alpha_skew)
sns.distplot(p)
QzBFV.png
然后写了一个快速版本(没有经过大量测试)似乎是正确的:
def randn_skew_fast(N, alpha=0.0, loc=0.0, scale=1.0):
sigma = alpha / np.sqrt(1.0 + alpha**2)
u0 = np.random.randn(N)
v = np.random.randn(N)
u1 = (sigma*u0 + np.sqrt(1.0 - sigma**2)*v) * scale
u1[u0 < 0] *= -1
u1 = u1 + loc
return u1
plt.subplots(figsize=(12,4))
for alpha_skew in SKEW_PARAMS:
p = randn_skew_fast(NUM_SAMPLES, alpha_skew)
sns.distplot(p)
1
# -*- coding: utf-8 -*-
# @Time : 2022-10-14 14:03
# @Author : hyh
# @File : test1014.py
# @Software : PyCharm
import numpy as np
from scipy import stats
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
def calc_statistics(x):
n = x.shape[0] # 样本个数
# 手动计算
# 分别表示各个k阶矩
m = 0
m2 = 0
m3 = 0
m4 = 0
for t in x:
m += t
m2 += t * t
m3 += t ** 3
m4 += t ** 4
m /= n
m2 /= n
m3 /= n
m4 /= n
# 代入公式求个值
mu = m
sigma = np.sqrt(m2 - mu * mu)
skew = (m3 - 3 * mu * m2 + 2 * mu ** 3) / sigma ** 3
kurtosis = (m4 - 4 * mu * m3 + 6 * mu * mu * m2 - 4 * mu ** 3 * mu + mu ** 4) / sigma ** 4 - 3
print('手动计算均值、标准差、偏度、峰度:', mu, sigma, skew, kurtosis)
# 使用系统函数验证
mu = np.mean(x, axis=0)
sigma = np.std(x, axis=0)
skew = stats.skew(x)
kurtosis = stats.kurtosis(x)
return mu, sigma, skew, kurtosis
if __name__ == '__main__':
d = np.random.randn(100000)
print(d)
mu, sigma, skew, kurtosis = calc_statistics(d)
print('函数库计算均值、标准差、偏度、峰度:', mu, sigma, skew, kurtosis)
我来教你如何使用