Выборочное распределение выборочного среднего

Эта программа генерирует полное выборочное распределение выборочного среднего. В родительской популяции есть элементы, которые имеют N различных значений. Рисуются образцы размера n, с заменой. Когда N и n увеличиваются (особенно N), количество комбинаций становится довольно большим, и программа может исчерпать доступную RAM. Я прошу совета о том, как повысить эффективность программы, а также улучшить пользовательский интерфейс (и любые другие аспекты, которые обнаружит читатель). Эта программа не является частной собственностью: если кто-то может использовать ее или создать лучшую версию, сделайте это с признанием моего авторства в этой программе.

#!/usr/bin/env python
##Imports first##
##These Python modules are imported to facilitate analysis.
##
import itertools                #used to generate combinations
import math                     #used for some computations
import statistics as st         #used for some computations
import operator                 # used to compute powers
import functools                #used to write functions
import time                     #time counter, can be removed
import matplotlib.pyplot as plt #used for graphing
import numpy as np              #used for graphing


####USER INPUT IS PROMPTED BY THE COMMANDS IN THIS SECTION.####
##INPUT: Create a list of values.
#A function is created to convert integers and fractions to a
#decimal (floating-point) representation
##Function begins
##
def convert_to_float(frac_str):
    try:
        return float(frac_str)
    except ValueError:
        try:
            num, denom = frac_str.split("https://codereview.stackexchange.com/")
        except ValueError:
            return None
        try:
            leading, num = num.split(' ')
        except ValueError:
            return float(num) / float(denom)        
        if float(leading) < 0:
            sign_mult = -1
        else:
            sign_mult = 1
        return float(leading) + sign_mult * (float(num) / float(denom))
##Function ends
##
##= = = = = = = = = = = = = = = = = = = = = = = = = = = = 
print("Enter the values of the N discrete elements in the population.")
print("    Enter these as either decimal, integer, or fractional values.")
print("    Mixed fractions are not allowed: 7/2, not 3 1/2.")
print("    The list should look like this: [-1,2.3,10/3,8].")
print(" >>>>> Do not put spaces between entered values.")
print("Suggestion: Create this input and the list of frequencies")
print("    below in a text file and cut-and-paste ") 
print("    in order to reduce likelihood of entry error.")
print("")
##= = = = = = = = = = = = = = = = = = = = = = = = = = = =
valListInp = input("Enter a list of values, bracketed, like this [-1,2.3,10/3,8],n      with no spaces between entries:  ")
valListStr = valListInp.strip('][').split(',')
valList = list(map(convert_to_float, valListStr))
##= = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##The length of this valList equals the number of distinct elements. 
N=len(valList)
##= = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##INPUT: Create a list of relative frequencies, with N entries.
print(" ")
print("Now do the same for a list of relative frequencies.")
print("Each relative frequency in this list must be positive, and they must sum to 1.")
print("The calculated sum will appear below as a check that this sum is 1")
print(">>>>> Again, no spaces")
print("For example [0.2,0.3,2/5,.1].")
print("")
freqListInp = input("Enter a list of relative frequencies, bracketed like this [0.2,0.3,2/5,.1],n      with no spaces between entries: ")
freqListStr = freqListInp.strip('][').split(',')
freqList = list(map(convert_to_float, freqListStr))


##= = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##INPUT: Enter the sample size.
n = input("Enter the sample size, stated as an integer: ")
n=int(n)
print("= = = = = = = = = = = = = = = = =")
"""
USER INPUT ENDS HERE.
Do not change anything below unless you intend to revise the program.
"""
##= = = = = = = = = = = = = = = = = = = = = = = = = = = = 
print("INPUT SUMMARY")
print("    The user-provided information follows:" )
print("    The entered values are ", valList) 
print("    Their relative frequencies are ", freqList)
print("         Check: The relative frequencies sum to","%.4f" % sum(freqList))
print("    The number of distinct elements is N =", N)
print("    The sample size is n = ",n)
print("= = = = = = = = = = = = = = = = =")
print("")
print("Output follows. You will be prompted for options.")

##Compute the population mean
mu=0
for i in range(0,N):
    mu = mu+valList[i]*freqList[i]
print("")
print("The population mean is ", "%.4f" % mu)

#Compute the population variance and standard deviation.
variance=0
for i in range(0,N):
   variance = variance + freqList[i]*( valList[i] - mu)**2
sigma=variance**(1/2)
print("The population variance is ", "%.4f" % variance, "and")
print("      the population standard deviation is ", "%.4f" % sigma)
print(" ")

start_time = time.time() 

"""
= = = = = = Analysis of the sample begins here. = = = = = =
Determine the number of combinations.
"""

C=math.factorial(N+n-1)/(math.factorial(N-1 )*math.factorial(n))
C = int(C)
print(C, "combinations will be generated.")
print(" ")

#Use itertools and to create a list of combinations, named combsList.
#First, generate combsList.
##This lists all combinations.
##Each combination will imply a value of xbar, stddev, etc.
##Some values of xbar, stddev, etc. will be generated by more than on combination.
##
combsList=list(itertools.combinations_with_replacement(valList,n))

print("")
## The next command produces a list that is used later.
## It does not produce any output from the program.

#Count the number of occurrences of each value
countList=[]
for i in range(0,C):
    for j in range(0,N):
        countList.append(combsList[i].count( valList[j]))

#Make the count list a nested list of C/n lists, 
#   each with n elements#
countList=[countList[i:i + len(valList) ] for i in range(0, len(countList), len(valList))]

#Create a list of C means (some may be repeated).
meansList=[]
for i in range(0,C):
    meansList.append(st.mean(combsList[i][j] for j in range(0,n)) )

#Do the same for sample standard deviations (some may be repeated).
stdevList=[]
for i in range(0,C):
    stdevList.append(st.stdev(combsList[i][j] for j in range(0,n)) )
    
#Do the same for sample variances (some may be repeated).
varianceList=[]
for i in range(0,C):
    varianceList.append(st.variance(combsList[i][j] for j in range(0,n)) )

#The next list permsList is the number of permutations of each combination.
    ## For mechanical reasons, it is not in the yp list.
def f(i):     #A function to create the values
    global zz
    zz=1 ## First create the denominator.
    for j in range(0,N):
        zz = zz*math.factorial(countList[i][j])
        ## Now divide the denominator into the numerator, n!
    zz=math.factorial(n)/zz
    return zz   
                                            
permsList=[]   #An empty list to which values are appended.
for i in range(0,C):         #Appends the values
    permsList.append(f(i))

#Now the probabilities. First a list of frequencies raised to powers.
raisedList=[]
for i in range(0,C):
    for j in range(0,N):
            raisedList.append(freqList[j]**countList[i][j])
#Turn the list into a nested list.
raisedList=[raisedList[i:i + N ] for i in range(0, len(raisedList), N)]

#Construct the probabilities list by multiplying the rows in raisedList
probList =[]
for i in range(0,C):probList.append(functools.reduce(operator.mul,raisedList[i],1))

#Next multiply probList and permsList values.
combProbList = []
for i in range(0,C):
    combProbList.append(permsList[i]*probList[i])
 
stdevProbList = []
for i in range(0,C):
    stdevProbList.append(permsList[i]*probList[i])


###   MEANS, STANDARDARD DEVIATIONS, AND THEIR PROBABILITIES  ### 
#Use set() to create a sorted list of distinct sample means --
#no replications. Also determine the number of distinct means.
#
means = sorted( list(set(meansList)) )
M = len(means)

#Create a sorted list of sample variances.As above, no replications.
variances=[]
variances=sorted( list(set ( varianceList)))

#Create a sorted list of sample standard deviations
# As above, no replications. Also determine the number of distinct stdevs.
stdevs=[]
stdevs=sorted( list(set ( stdevList)))
S=len(stdevs)

# Probabilities for means
MprobSums=[0]*M
for j in range(0,M):
    for i in range(0,C):
        if means[j]==meansList[i]:
            MprobSums[j] = MprobSums[j]+combProbList[i]
#Cumulative probabilities for means
McumProbSums=[MprobSums[0]]
for i in range(1,M):
    McumProbSums.append(MprobSums[i] + McumProbSums[i-1])

# Standard deviations
SprobSums=[0]*S
for j in range(0,S):
    for i in range(0,C):
        if stdevs[j]==stdevList[i]:
            SprobSums[j] = SprobSums[j]+combProbList[i]

ScumProbSums=[SprobSums[0]]
for i in range(1,S):
    ScumProbSums.append(SprobSums[i] + ScumProbSums[i-1])

###Variance, just the probabilities, not the cumulative probabilities
VprobSums=[0]*S
for j in range(0,S):
    for i in range(0,C):
        if variances[j]==varianceList[i]:
            VprobSums[j] = VprobSums[j]+combProbList[i] 

##Accumulate the sum of xbar*freq and the sum of v*freq

xbar_freqList = [] 
for i in range(0, M): 
    xbar_freqList.append(means[i] * MprobSums[i]) 

V_freqList = []
for i in range(0, S): 
    V_freqList.append(variances[i] * VprobSums[i])
    
print("--- %s seconds ---" % (time.time() - start_time))
L = int(input("Enter the number of combinations to view:  "))
if L > 0: print("The list shows the following for each of the first", L, "combinations: ")
if L > 0:print("   the run number, i (Run),")
if L > 0:print("   the implied mean the i-th combination (Mean), ")
if L > 0:print("   the probability of this combination (Prob), ")
if L > 0:print("   the number of permutations of the elements in this combination (Perms), and")
if L > 0:print("   the elements of this combination (Combination)." )
print("")
if L > 0:print("Run", "  Mean", "   Prob", "    Perms", "           Combination"  )
for i in range(0,L):
    print(i+1," ", "%.4f" % meansList[i] , "%.8f" % probList[i], "  " "%.0f" % permsList[i] , "     ", combsList[i]   )
    ##print(i+1, "%.4f" % meansList[i] , "%.4f" % probList[i], "%.0f" % permsList[i] , *[f"{element:.4f}" for element in combsList[1]]  )
print("")
print(M, "sample means and", S, "sample standard deviations  ")
###print("     ", R, "sample range lengths, and", E, "distinct intervals")
print("     are generated and can be printed.")
print(" ")
print("Average values (means) of sample means and standard deviations:" )
print("    The average sample mean value is approximately", "%.4f" % sum(xbar_freqList))
print("    The average sample variance value is approximately", "%.4f" % sum(V_freqList))
print("     These are approximations due to rounding in the calculations of the")
print("         individual values of the sample mean and variance.")
print(" ")
## See https://docs.python.org/2/tutorial/floatingpoint.html

print("Output is saved to the folder that contains this program.")
print("These text files are named 'xbarout.txt' and 'stdevout.txt'. " )
###print("     'rangeout.txt', and 'rangeintervalout.txt'.")



printmeans=input("Do you want to print means (y/n)?  ")
if printmeans=="y" or printmeans == "Y":
    print("Sample Means")
    print(" xbar", "prob", "cumprob")
    for i in range(M):
        print("{0:.3f}".format(means[i]),"{0:.4f}".format(MprobSums[i]),
              "{0:.4f}".format(McumProbSums[i]) )
print("    =  =  = ")

printstdevs=input("Do you want to print standard deviations (y/n)? "  )
if printstdevs=="y" or printstdevs == "Y":
    print("Sample Standard Deviations")
    print(" stdev", "prob", "cumprob")
    for i in range(S):
        print("{0:.3f}".format(stdevs[i]),"{0:.4f}".format(SprobSums[i]),
              "{0:.4f}".format(ScumProbSums[i]) )
print("    =  =  = ")

##print("Sample Means")
import sys
orig_stdout = sys.stdout
f = open('xbarout.txt', 'w')
sys.stdout = f
print("xbar", "prob", "cumprob")
for i in range(M):
    print("{0:.3f}".format(means[i]),"{0:.4f}".format(MprobSums[i]),
    "{0:.4f}".format(McumProbSums[i]) )
sys.stdout = orig_stdout
f.close()

##print("Sample Standard Deviations")
#import sys
orig_stdout = sys.stdout
f = open('stdevout.txt', 'w')
sys.stdout = f
print(" stdev", "prob", "cumprob")
for i in range(S):
    print("{0:.3f}".format(stdevs[i]),"{0:.4f}".format(SprobSums[i]),
    "{0:.4f}".format(ScumProbSums[i]) )
sys.stdout = orig_stdout
f.close()

print("","n")

##= = = = = = = = = = = = = = = = = = = = = = = = = = = = 
#Plots
print("= = = = = = Plots Follow = = = = = = ")

plt.bar(valList,freqList)
plt.title("Population Relative Frequencies")
plt.xlabel("x")
plt.ylabel("Relative Frequency");
plt.show()

plt.bar(means, MprobSums, width = 1/M**(.8),align='center') ##The power can be changed
plt.title("Sample Mean (xbar) Probabilities, n = "+ str(n))
plt.xlabel("xbar")
plt.ylabel("prob");
plt.show()

plt.bar(x=stdevs, height=SprobSums,width= 1/S**(.8))  ##The power can be changed
plt.title("Sample Standard Deviation (s) Probabilities, n = "+ str(n))
plt.xlabel("s")
plt.ylabel("prob");
plt.show()


print("For this population with", N, 
      "distinct elements and a sample size of" ,n,":")

print("    ",C, "combinations,", M," means, ", S,"standard deviations result.")
#print("    ",R, "range lengths, and", E, "distinct ranges result.")
print("--- %s seconds ---" % (time.time() - start_time))
```

0

Добавить комментарий

Ваш адрес email не будет опубликован. Обязательные поля помечены *