/* Copyright Grant Rostig, License Boost 1.0 Bad name, should be "permutations" (not "combinations"), also the selection is considered to be "with replacement" */ #include #include #include #include #include constexpr int STRING_LEN{49}; auto s(auto i) {return i/2.09;} // s: Scale down the value by some factor with goal of having all requencies sum to 100% or probability of 1, or thereabouts. std::unordered_map char_occurance_freqs = { // First 26 values are the letter frequencies in English (in percentage), but we added upper case and more, so number must be scaled down with s(). {'a', s(8.167)}, {'b', s(1.492)}, {'c', s(2.782)}, {'d', s(4.253)}, {'e', s(12.702)}, {'f', s(2.228)}, {'g', s(2.015)}, {'h', s(6.094)}, {'i', s(6.966)}, {'j', s(0.153)}, {'k', s(0.772)}, {'l', s(4.025)}, {'m', s(2.406)}, {'n', s(6.749)}, {'o', s(7.507)}, {'p', s(1.929)}, {'q', s(0.095)}, {'r', s(5.987)}, {'s', s(6.327)}, {'t', s(9.056)}, {'u', s(2.758)}, {'v', s(0.978)}, {'w', s(2.360)}, {'x', s(0.150)}, {'y', s(1.974)}, {'z', s(0.074)}, {'A', s(8.167)}, {'B', s(1.492)}, {'C', s(2.782)}, {'D', s(4.253)}, {'E', s(12.702)}, {'F', s(2.228)}, {'G', s(2.015)}, {'H', s(6.094)}, {'I', s(6.966)}, {'J', s(0.153)}, {'K', s(0.772)}, {'L', s(4.025)}, {'M', s(2.406)}, {'N', s(6.749)}, {'O', s(7.507)}, {'P', s(1.929)}, {'Q', s(0.095)}, {'R', s(5.987)}, {'S', s(6.327)}, {'T', s(9.056)}, {'U', s(2.758)}, {'V', s(0.978)}, {'W', s(2.360)}, {'X', s(0.150)}, {'Y', s(1.974)}, {'Z', s(0.074)}, {'2', s(0.999)}, {'3', s(0.999)}, {'4', s(0.999)}, {'5', s(0.999)}, {'6', s(0.999)}, {'7', s(0.999)}, }; int main() { // std::unordered_map const letterProbabilities; double character_set_radix{static_cast(ssize(char_occurance_freqs))}; double log_x{log2(character_set_radix)}; double possible_permutations_with_replacement = pow(character_set_radix, STRING_LEN); // Total possible combinations without considering probabilities double total_frequency{0.0}; for (auto const & pair : char_occurance_freqs) { total_frequency += pair.second; } double entropy{0.0}; // Shannon entropy to be calculated, apparently only an estimate. for (const auto& pair : char_occurance_freqs) { double probability = pair.second / total_frequency; // letterProbabilities[pair.first] = probability; assert(probability > 0.0); entropy -= probability * log2(probability); } double total_entropy = STRING_LEN * entropy; double effective_permutations = pow(2, total_entropy); std::cout << "Radix or number of chars in set: " << character_set_radix << std::endl; std::cout << "Total Frequency of chars: " << total_frequency << ". Should 100." << std::endl; std::cout << "Bits needed for radix: " << log_x << std::endl; std::cout << "Entropy per char in random English text frequency on average:" << entropy << " bits" << std::endl; std::cout << "Total entropy for char permutations of length "<< STRING_LEN <<": "<< total_entropy << " bits" << std::endl; std::cout << "Effective number of char permutations: " << std::scientific << std::setprecision(8) << effective_permutations << std::endl; std::cout << "Total possible permutations " << character_set_radix <<"^"<