Эта программа генерирует полное выборочное распределение выборочного среднего. В родительской популяции есть элементы, которые имеют N различных значений. Рисуются образцы размера n, с заменой. Когда N и n увеличиваются (особенно N), количество комбинаций становится довольно большим, и программа может исчерпать доступную RAM. Я прошу совета о том, как повысить эффективность программы, а также улучшить пользовательский интерфейс (и любые другие аспекты, которые обнаружит читатель). Эта программа не является частной собственностью: если кто-то может использовать ее или создать лучшую версию, сделайте это с признанием моего авторства в этой программе.
#!/usr/bin/env python
##Imports first##
##These Python modules are imported to facilitate analysis.
##
import itertools #used to generate combinations
import math #used for some computations
import statistics as st #used for some computations
import operator # used to compute powers
import functools #used to write functions
import time #time counter, can be removed
import matplotlib.pyplot as plt #used for graphing
import numpy as np #used for graphing
####USER INPUT IS PROMPTED BY THE COMMANDS IN THIS SECTION.####
##INPUT: Create a list of values.
#A function is created to convert integers and fractions to a
#decimal (floating-point) representation
##Function begins
##
def convert_to_float(frac_str):
try:
return float(frac_str)
except ValueError:
try:
num, denom = frac_str.split("https://codereview.stackexchange.com/")
except ValueError:
return None
try:
leading, num = num.split(' ')
except ValueError:
return float(num) / float(denom)
if float(leading) < 0:
sign_mult = -1
else:
sign_mult = 1
return float(leading) + sign_mult * (float(num) / float(denom))
##Function ends
##
##= = = = = = = = = = = = = = = = = = = = = = = = = = = =
print("Enter the values of the N discrete elements in the population.")
print(" Enter these as either decimal, integer, or fractional values.")
print(" Mixed fractions are not allowed: 7/2, not 3 1/2.")
print(" The list should look like this: [-1,2.3,10/3,8].")
print(" >>>>> Do not put spaces between entered values.")
print("Suggestion: Create this input and the list of frequencies")
print(" below in a text file and cut-and-paste ")
print(" in order to reduce likelihood of entry error.")
print("")
##= = = = = = = = = = = = = = = = = = = = = = = = = = = =
valListInp = input("Enter a list of values, bracketed, like this [-1,2.3,10/3,8],n with no spaces between entries: ")
valListStr = valListInp.strip('][').split(',')
valList = list(map(convert_to_float, valListStr))
##= = = = = = = = = = = = = = = = = = = = = = = = = = = =
##The length of this valList equals the number of distinct elements.
N=len(valList)
##= = = = = = = = = = = = = = = = = = = = = = = = = = = =
##INPUT: Create a list of relative frequencies, with N entries.
print(" ")
print("Now do the same for a list of relative frequencies.")
print("Each relative frequency in this list must be positive, and they must sum to 1.")
print("The calculated sum will appear below as a check that this sum is 1")
print(">>>>> Again, no spaces")
print("For example [0.2,0.3,2/5,.1].")
print("")
freqListInp = input("Enter a list of relative frequencies, bracketed like this [0.2,0.3,2/5,.1],n with no spaces between entries: ")
freqListStr = freqListInp.strip('][').split(',')
freqList = list(map(convert_to_float, freqListStr))
##= = = = = = = = = = = = = = = = = = = = = = = = = = = =
##INPUT: Enter the sample size.
n = input("Enter the sample size, stated as an integer: ")
n=int(n)
print("= = = = = = = = = = = = = = = = =")
"""
USER INPUT ENDS HERE.
Do not change anything below unless you intend to revise the program.
"""
##= = = = = = = = = = = = = = = = = = = = = = = = = = = =
print("INPUT SUMMARY")
print(" The user-provided information follows:" )
print(" The entered values are ", valList)
print(" Their relative frequencies are ", freqList)
print(" Check: The relative frequencies sum to","%.4f" % sum(freqList))
print(" The number of distinct elements is N =", N)
print(" The sample size is n = ",n)
print("= = = = = = = = = = = = = = = = =")
print("")
print("Output follows. You will be prompted for options.")
##Compute the population mean
mu=0
for i in range(0,N):
mu = mu+valList[i]*freqList[i]
print("")
print("The population mean is ", "%.4f" % mu)
#Compute the population variance and standard deviation.
variance=0
for i in range(0,N):
variance = variance + freqList[i]*( valList[i] - mu)**2
sigma=variance**(1/2)
print("The population variance is ", "%.4f" % variance, "and")
print(" the population standard deviation is ", "%.4f" % sigma)
print(" ")
start_time = time.time()
"""
= = = = = = Analysis of the sample begins here. = = = = = =
Determine the number of combinations.
"""
C=math.factorial(N+n-1)/(math.factorial(N-1 )*math.factorial(n))
C = int(C)
print(C, "combinations will be generated.")
print(" ")
#Use itertools and to create a list of combinations, named combsList.
#First, generate combsList.
##This lists all combinations.
##Each combination will imply a value of xbar, stddev, etc.
##Some values of xbar, stddev, etc. will be generated by more than on combination.
##
combsList=list(itertools.combinations_with_replacement(valList,n))
print("")
## The next command produces a list that is used later.
## It does not produce any output from the program.
#Count the number of occurrences of each value
countList=[]
for i in range(0,C):
for j in range(0,N):
countList.append(combsList[i].count( valList[j]))
#Make the count list a nested list of C/n lists,
# each with n elements#
countList=[countList[i:i + len(valList) ] for i in range(0, len(countList), len(valList))]
#Create a list of C means (some may be repeated).
meansList=[]
for i in range(0,C):
meansList.append(st.mean(combsList[i][j] for j in range(0,n)) )
#Do the same for sample standard deviations (some may be repeated).
stdevList=[]
for i in range(0,C):
stdevList.append(st.stdev(combsList[i][j] for j in range(0,n)) )
#Do the same for sample variances (some may be repeated).
varianceList=[]
for i in range(0,C):
varianceList.append(st.variance(combsList[i][j] for j in range(0,n)) )
#The next list permsList is the number of permutations of each combination.
## For mechanical reasons, it is not in the yp list.
def f(i): #A function to create the values
global zz
zz=1 ## First create the denominator.
for j in range(0,N):
zz = zz*math.factorial(countList[i][j])
## Now divide the denominator into the numerator, n!
zz=math.factorial(n)/zz
return zz
permsList=[] #An empty list to which values are appended.
for i in range(0,C): #Appends the values
permsList.append(f(i))
#Now the probabilities. First a list of frequencies raised to powers.
raisedList=[]
for i in range(0,C):
for j in range(0,N):
raisedList.append(freqList[j]**countList[i][j])
#Turn the list into a nested list.
raisedList=[raisedList[i:i + N ] for i in range(0, len(raisedList), N)]
#Construct the probabilities list by multiplying the rows in raisedList
probList =[]
for i in range(0,C):probList.append(functools.reduce(operator.mul,raisedList[i],1))
#Next multiply probList and permsList values.
combProbList = []
for i in range(0,C):
combProbList.append(permsList[i]*probList[i])
stdevProbList = []
for i in range(0,C):
stdevProbList.append(permsList[i]*probList[i])
### MEANS, STANDARDARD DEVIATIONS, AND THEIR PROBABILITIES ###
#Use set() to create a sorted list of distinct sample means --
#no replications. Also determine the number of distinct means.
#
means = sorted( list(set(meansList)) )
M = len(means)
#Create a sorted list of sample variances.As above, no replications.
variances=[]
variances=sorted( list(set ( varianceList)))
#Create a sorted list of sample standard deviations
# As above, no replications. Also determine the number of distinct stdevs.
stdevs=[]
stdevs=sorted( list(set ( stdevList)))
S=len(stdevs)
# Probabilities for means
MprobSums=[0]*M
for j in range(0,M):
for i in range(0,C):
if means[j]==meansList[i]:
MprobSums[j] = MprobSums[j]+combProbList[i]
#Cumulative probabilities for means
McumProbSums=[MprobSums[0]]
for i in range(1,M):
McumProbSums.append(MprobSums[i] + McumProbSums[i-1])
# Standard deviations
SprobSums=[0]*S
for j in range(0,S):
for i in range(0,C):
if stdevs[j]==stdevList[i]:
SprobSums[j] = SprobSums[j]+combProbList[i]
ScumProbSums=[SprobSums[0]]
for i in range(1,S):
ScumProbSums.append(SprobSums[i] + ScumProbSums[i-1])
###Variance, just the probabilities, not the cumulative probabilities
VprobSums=[0]*S
for j in range(0,S):
for i in range(0,C):
if variances[j]==varianceList[i]:
VprobSums[j] = VprobSums[j]+combProbList[i]
##Accumulate the sum of xbar*freq and the sum of v*freq
xbar_freqList = []
for i in range(0, M):
xbar_freqList.append(means[i] * MprobSums[i])
V_freqList = []
for i in range(0, S):
V_freqList.append(variances[i] * VprobSums[i])
print("--- %s seconds ---" % (time.time() - start_time))
L = int(input("Enter the number of combinations to view: "))
if L > 0: print("The list shows the following for each of the first", L, "combinations: ")
if L > 0:print(" the run number, i (Run),")
if L > 0:print(" the implied mean the i-th combination (Mean), ")
if L > 0:print(" the probability of this combination (Prob), ")
if L > 0:print(" the number of permutations of the elements in this combination (Perms), and")
if L > 0:print(" the elements of this combination (Combination)." )
print("")
if L > 0:print("Run", " Mean", " Prob", " Perms", " Combination" )
for i in range(0,L):
print(i+1," ", "%.4f" % meansList[i] , "%.8f" % probList[i], " " "%.0f" % permsList[i] , " ", combsList[i] )
##print(i+1, "%.4f" % meansList[i] , "%.4f" % probList[i], "%.0f" % permsList[i] , *[f"{element:.4f}" for element in combsList[1]] )
print("")
print(M, "sample means and", S, "sample standard deviations ")
###print(" ", R, "sample range lengths, and", E, "distinct intervals")
print(" are generated and can be printed.")
print(" ")
print("Average values (means) of sample means and standard deviations:" )
print(" The average sample mean value is approximately", "%.4f" % sum(xbar_freqList))
print(" The average sample variance value is approximately", "%.4f" % sum(V_freqList))
print(" These are approximations due to rounding in the calculations of the")
print(" individual values of the sample mean and variance.")
print(" ")
## See https://docs.python.org/2/tutorial/floatingpoint.html
print("Output is saved to the folder that contains this program.")
print("These text files are named 'xbarout.txt' and 'stdevout.txt'. " )
###print(" 'rangeout.txt', and 'rangeintervalout.txt'.")
printmeans=input("Do you want to print means (y/n)? ")
if printmeans=="y" or printmeans == "Y":
print("Sample Means")
print(" xbar", "prob", "cumprob")
for i in range(M):
print("{0:.3f}".format(means[i]),"{0:.4f}".format(MprobSums[i]),
"{0:.4f}".format(McumProbSums[i]) )
print(" = = = ")
printstdevs=input("Do you want to print standard deviations (y/n)? " )
if printstdevs=="y" or printstdevs == "Y":
print("Sample Standard Deviations")
print(" stdev", "prob", "cumprob")
for i in range(S):
print("{0:.3f}".format(stdevs[i]),"{0:.4f}".format(SprobSums[i]),
"{0:.4f}".format(ScumProbSums[i]) )
print(" = = = ")
##print("Sample Means")
import sys
orig_stdout = sys.stdout
f = open('xbarout.txt', 'w')
sys.stdout = f
print("xbar", "prob", "cumprob")
for i in range(M):
print("{0:.3f}".format(means[i]),"{0:.4f}".format(MprobSums[i]),
"{0:.4f}".format(McumProbSums[i]) )
sys.stdout = orig_stdout
f.close()
##print("Sample Standard Deviations")
#import sys
orig_stdout = sys.stdout
f = open('stdevout.txt', 'w')
sys.stdout = f
print(" stdev", "prob", "cumprob")
for i in range(S):
print("{0:.3f}".format(stdevs[i]),"{0:.4f}".format(SprobSums[i]),
"{0:.4f}".format(ScumProbSums[i]) )
sys.stdout = orig_stdout
f.close()
print("","n")
##= = = = = = = = = = = = = = = = = = = = = = = = = = = =
#Plots
print("= = = = = = Plots Follow = = = = = = ")
plt.bar(valList,freqList)
plt.title("Population Relative Frequencies")
plt.xlabel("x")
plt.ylabel("Relative Frequency");
plt.show()
plt.bar(means, MprobSums, width = 1/M**(.8),align='center') ##The power can be changed
plt.title("Sample Mean (xbar) Probabilities, n = "+ str(n))
plt.xlabel("xbar")
plt.ylabel("prob");
plt.show()
plt.bar(x=stdevs, height=SprobSums,width= 1/S**(.8)) ##The power can be changed
plt.title("Sample Standard Deviation (s) Probabilities, n = "+ str(n))
plt.xlabel("s")
plt.ylabel("prob");
plt.show()
print("For this population with", N,
"distinct elements and a sample size of" ,n,":")
print(" ",C, "combinations,", M," means, ", S,"standard deviations result.")
#print(" ",R, "range lengths, and", E, "distinct ranges result.")
print("--- %s seconds ---" % (time.time() - start_time))
```