From ec6e11f2f029d8882fc8bc25cec01df48444ddc3 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 4 Jun 2024 16:46:35 -0400 Subject: [PATCH] Refactor gematria calculation and remove unnecessary code --- system_777/numbers.py | 288 ++++++++++-------------------------------- test.py | 13 +- 2 files changed, 73 insertions(+), 228 deletions(-) diff --git a/system_777/numbers.py b/system_777/numbers.py index c638b29..4b66a3f 100644 --- a/system_777/numbers.py +++ b/system_777/numbers.py @@ -3,15 +3,13 @@ from enum import Enum from functools import lru_cache from typing import List - import nltk -from nltk.corpus import words +from nltk.corpus import words, brown, gutenberg from nltk.tokenize import word_tokenize from nltk.util import ngrams - -# Prep NLTK. -for corpus in ["brown", "words", "punkt"]: +# Ensure the necessary NLTK data is downloaded +for corpus in ["brown", "words", "punkt", "gutenberg"]: nltk.download(corpus) @@ -23,16 +21,7 @@ class GematriaSystem(Enum): Reverse = "Reverse" -def extract_phrases_from_corpus(corpus, n: int) -> List[str]: - phrases = [] - for fileid in corpus.fileids(): - words = nltk.corpus.brown.words(fileid) - n_grams = ngrams(words, n) - phrases.extend([" ".join(gram) for gram in n_grams]) - return phrases - - -# Mappings for Hebrew, English, and Simple Gematria +# Mappings for Hebrew, English, Simple, and Reverse Gematria MAPPINGS = { GematriaSystem.Hebrew: { "a": 1, @@ -149,6 +138,55 @@ MAPPINGS = { } +# Define the Gematria calculation functions +@lru_cache(maxsize=None) +def calculate_gematria( + name: str, system: str | GematriaSystem = GematriaSystem.Hebrew +) -> int: + # Capitalize, in case all–lowered. + try: + system = system.capitalize() + except AttributeError: + pass + + # If system is a string, convert it to a GematriaSystem enum. + if isinstance(system, str): + system = GematriaSystem[system] + + return sum(MAPPINGS[system].get(char, 0) for char in name.lower()) + + +# Extract phrases from NLTK corpus +def extract_phrases_from_corpus(corpus, n: int) -> List[str]: + phrases = [] + for fileid in corpus.fileids(): + words = corpus.words(fileid) + n_grams = ngrams(words, n) + phrases.extend([" ".join(gram) for gram in n_grams]) + return phrases + + +# Define functions to find matching words and phrases +def find_words( + target_value: int, *, system: str | GematriaSystem = GematriaSystem.Hebrew +) -> List[str]: + matching_words = [] + for word in WORDS: + if calculate_gematria(word, system) == target_value: + matching_words.append(word.capitalize()) + return sorted(matching_words) + + +def find_phrases( + target_value: int, *, system: str | GematriaSystem = GematriaSystem.Hebrew +): + for phrase in EXTRACTED_PHRASES: + n = calculate_gematria(phrase, system=system) + + if n == target_value: + yield (phrase) + + # Sample dictionary of words and phrases WORDS = [ "Hello", @@ -202,170 +240,6 @@ WORDS = [ "Forgiveness", "Peace", "Joy", - "Chastity", - "Luna", - "Jade", - "Amber", - "Ruby", - "Sapphire", - "Emerald", - "Diamond", - "Gold", - "Silver", - "Bronze", - "Copper", - "Iron", - "Steel", - "Platinum", - "Titanium", - "Aluminum", - "Lead", - "Mercury", - "Uranium", - "Plutonium", - "Neptunium", - "Thorium", - "Radium", - "Polonium", - "Francium", - "Rubidium", - "Potassium", - "Sodium", - "Lithium", - "Calcium", - "Magnesium", - "Aluminum", - "Silicon", - "Phosphorus", - "Sulfur", - "Chlorine", - "Argon", - "Potassium", - "Calcium", - "Scandium", - "Titanium", - "Vanadium", - "Chromium", - "Manganese", - "Iron", - "Cobalt", - "Nickel", - "Copper", - "Zinc", - "Gallium", - "Germanium", - "Arsenic", - "Selenium", - "Bromine", - "Krypton", - "Rubidium", - "Strontium", - "Yttrium", - "Zirconium", - "Niobium", - "Molybdenum", - "Technetium", - "Ruthenium", - "Rhodium", - "Palladium", - "Silver", - "Cadmium", - "Indium", - "Tin", - "Antimony", - "Tellurium", - "Iodine", - "Xenon", - "Cesium", - "Barium", - "Lanthanum", - "Cerium", - "Praseodymium", - "Neodymium", - "Promethium", - "Samarium", - "Europium", - "Gadolinium", - "Terbium", - "Dysprosium", - "Holmium", - "Erbium", - "Thulium", - "Ytterbium", - "Lutetium", - "Hafnium", - "Tantalum", - "Tungsten", - "Rhenium", - "Osmium", - "Iridium", - "Platinum", - "Gold", - "Mercury", - "Thallium", - "Lead", - "Bismuth", - "Polonium", - "Astatine", - "Radon", - "Francium", - "Radium", - "Actinium", - "Thorium", - "Protactinium", - "Uranium", - "Neptunium", - "Plutonium", - "Americium", - "Curium", - "Berkelium", - "Californium", - "Einsteinium", - "Fermium", - "Mendelevium", - "Nobelium", - "Lawrencium", - "Rutherfordium", - "Dubnium", - "Seaborgium", - "Bohrium", - "Hassium", - "Meitnerium", - "Darmstadtium", - "Roentgenium", - "Copernicium", - "Nihonium", - "Flerovium", - "Moscovium", - "Livermorium", - "Tennessine", - "Oganesson", - "Hydrogen", - "Helium", - "Lithium", - "Beryllium", - "Boron", - "Carbon", - "Nitrogen", - "Oxygen", - "Fluorine", - "Neon", - "Sodium", - "Magnesium", - "Aluminum", - "Silicon", - "Phosphorus", - "Sulfur", - "Chlorine", - "Argon", - "Potassium", - "Calcium", - "Scandium", - "Titanium", - "Vanadium", - "Chromium", - "Manganese", - "Iron", ] SHORT_PHRASES = [ @@ -401,49 +275,19 @@ SHORT_PHRASES = [ # Load the NLTK words corpus WORDS += words.words() - -# Memoize this function to avoid recalculating the same values +# Extract 3-gram phrases from the Brown corpus +EXTRACTED_PHRASES = [] +for i in range(2, 4): + EXTRACTED_PHRASES += extract_phrases_from_corpus(gutenberg, i) -@lru_cache(maxsize=None) -def calculate_gematria( - name: str, system: str | GematriaSystem = GematriaSystem.Hebrew -) -> int: - # Capitalize, in case all–lowered. - try: - system = system.capitalize() - except AttributeError: - pass +# Example usage +# target_gematria_value = 777 +# matching_words = find_words(target_gematria_value, system=GematriaSystem.English) +# matching_phrases = find_phrases(target_gematria_value, system=GematriaSystem.English) - # If system is a string, convert it to a GematriaSystem enum. - if isinstance(system, str): - system = GematriaSystem[system] - - return sum(MAPPINGS[system].get(char, 0) for char in name.lower()) - - -def find_words( - target_value: int, *, system: str | GematriaSystem = GematriaSystem.Hebrew -) -> List[str]: - - matching_phrases = [] - - for phrase in WORDS: - if calculate_gematria(phrase, system) == target_value: - matching_phrases.append(phrase.capitalize()) - - return sorted(matching_phrases) - - -def find_phrases( - target_value: int, *, system: str | GematriaSystem = GematriaSystem.Hebrew -) -> List[str]: - matching_phrases = [] - for phrase in SHORT_PHRASES: - tokenized_words = word_tokenize(phrase) - total_gematria = sum( - calculate_gematria(word, system=system) for word in tokenized_words - ) - if total_gematria == target_value: - matching_phrases.append(phrase) - return matching_phrases +# print(f"Words with Gematria value {target_gematria_value}: {matching_words}") +# print(f"Phrases with Gematria value {target_gematria_value}: {matching_phrases}") +# print( +# f"Extracted phrases with Gematria value {target_gematria_value}: {find_phrases(target_gematria_value, system=GematriaSystem.English)}" +# ) diff --git a/test.py b/test.py index 1aab41a..6d66d7a 100644 --- a/test.py +++ b/test.py @@ -7,11 +7,12 @@ from system_777.numbers import ( ) -NAME = "SHEKINAH" +NAME = "KENNETH ROBERT REITZ" SYSTEM = GematriaSystem.Hebrew -GEMATRIA_VALUE = calculate_gematria(NAME, system=SYSTEM) +# GEMATRIA_VALUE = calculate_gematria(NAME, system=SYSTEM) -print(f"The gematria value of the name '{NAME}' is {GEMATRIA_VALUE}.") - -print("Possible words with the gematria value of the name:") -print(find_words(GEMATRIA_VALUE)) +# print(f"The gematria value of the name '{NAME}' is {GEMATRIA_VALUE}.") +# print() +# print("Possible phrases with the gematria value of the name:") +for phrase in find_phrases(calculate_gematria(NAME, system=SYSTEM)): + print(phrase)