2022年 11月 7日

python求相关系数_python 求相关系数

两组序列数据,求两者的线性相关系数。

1:使用numpy

import random

import numpy as np

a = [random.randint(0, 10) for t in range(20)]

b = [random.randint(0, 10) for t in range(20)]

# 先构造一个矩阵

ab = np.array([a, b])

# 计算协方差矩阵

print(np.cov(ab))

print(np.corrcoef(ab))

2:使用pandas

import pandas as pd

# 使用 pandas 计算协方差、相关系数

# 使用 DataFrame 作为数据结构,为方便计算,我们会将 ab 矩阵转置

dfab = pd.DataFrame(ab.T, columns=[‘A’, ‘B’])

# A B 协方差

print(dfab.A.cov(dfab.B))

# A B 相关系数

print(dfab.A.corr(dfab.B))

3:使用原生函数

import random

import math

a = [random.randint(0, 10) for t in range(20)]

b = [random.randint(0, 10) for t in range(20)]

#计算平均值

def mean(x):

return sum(x) / len(x)

# 计算每一项数据与均值的差

def de_mean(x):

x_bar = mean(x)

return [x_i – x_bar for x_i in x]

# 辅助计算函数 dot product 、sum_of_squares

def dot(v, w):

return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sum_of_squares(v):

return dot(v, v)

# 方差

def variance(x):

n = len(x)

deviations = de_mean(x)

return sum_of_squares(deviations) / (n – 1)

# 标准差

def standard_deviation(x):

return math.sqrt(variance(x))

# 协方差

def covariance(x, y):

n = len(x)

return dot(de_mean(x), de_mean(y)) / (n -1)

# 相关系数

def correlation(x, y):

stdev_x = standard_deviation(x)

stdev_y = standard_deviation(y)

if stdev_x > 0 and stdev_y > 0:

return covariance(x, y) / stdev_x / stdev_y

else:

return 0

print(a)

print(b)

print(standard_deviation(a))

print(standard_deviation(b))

print(correlation(a,b))

4:使用R,spss,excel