Spaces:

michaelginn
/

Karijona_Hangman

Build error

App Files Files Community

michaelginn commited on Jun 20, 2023

Commit

1c56d55

1 Parent(s): a3679d8

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

.ipynb_checkpoints/app-checkpoint.py +159 -0
.ipynb_checkpoints/util-checkpoint.py +69 -0
README.md +3 -9
app.py +159 -0
corpus/1.txt +73 -0
util.py +69 -0

.ipynb_checkpoints/app-checkpoint.py ADDED Viewed

	@@ -0,0 +1,159 @@

+# Load our data
+import os
+def load_raw_text(corpus_directory: str, file_names=None) -> str:
+    """Loads all the text files in a directory into one large string"""
+    corpus = ""
+    for file_name in os.listdir(corpus_directory):
+        # Read the file as a string
+        file_path = os.path.join(corpus_directory, file_name)
+        if os.path.isdir(file_path):
+            continue
+        #  Make sure we only read text files
+        if ".txt" not in file_name:
+            continue
+        with open(file_path, 'r') as file:
+            file_contents = file.read()
+            corpus += (file_contents + "\n")
+    return corpus
+# REPLACE WITH YOUR CORPUS DIRECTORY
+corpus = load_raw_text(corpus_directory="./corpus")
+import re
+import util
+# TODO: Strip accents using util.strip_accents
+# TODO: Make corpus lowercase
+corpus = corpus.lower()
+# TODO: Split corpus into tokens using the following function
+word_regex = r"[a-zïëñ]+"
+def tokenize(text: str):
+    return re.findall(word_regex, text)
+s_tok = tokenize(corpus)
+# TODO: Create a set named "lexicon" with all of the unique words
+lexicon = set()
+for word in s_tok:
+    lexicon.add(word)
+filtered_lexicon = set()
+for word in lexicon:
+    if 3 <= len(word) <= 7:
+        filtered_lexicon.add(word)
+import random
+def random_scramble(lexicon: set):
+    lexicon = list(lexicon)
+    word = random.choice(lexicon)
+    # Turn the word into a list of characters
+    word_chars = list(word)
+    # Shuffle those characters
+    random.shuffle(word_chars)
+    # Re-join the characters into a string
+    shuffled = ''.join(word_chars)
+    return {'shuffled': shuffled, 'original': word}
+import gradio as gr
+from typing import Tuple
+def create_hangman_clue(word, guessed_letters):
+    """
+    Given a word and a list of letters, create the correct clue.
+    For instance, if the word is 'apple' and the guessed letters are 'a' and 'l', the clue should be 'a _ _ l _'
+    """
+    clue = ''
+    for letter in word:
+        if letter in guessed_letters:
+            clue += letter + ' '
+        else:
+            clue += '_ '
+    return clue
+def pick_new_word(lexicon):
+    lexicon = list(lexicon)
+    return {
+        'word': random.choice(lexicon),
+        'guessed_letters': set(),
+        'remaining_chances': 6
+    }
+def hangman_game(current_state, guess):
+    """Update the current state based on the guess."""
+    if guess in current_state['guessed_letters'] or len(guess) > 1:
+        # Illegal guess, do nothing
+        return (current_state, 'Invalid guess')
+    current_state['guessed_letters'].add(guess)
+    if guess not in current_state['word']:
+        # Wrong guess
+        current_state['remaining_chances'] -= 1
+        if current_state['remaining_chances'] == 0:
+            # No more chances! New word
+            current_state = pick_new_word(filtered_lexicon)
+            return (current_state, 'You lose!')
+        else:
+            return (current_state, 'Wrong guess :(')
+    else:
+        # Right guess, check if there's any letters left
+        for letter in current_state['word']:
+            if letter not in current_state['guessed_letters']:
+                # Still letters remaining
+                return (current_state, 'Correct guess!')
+        # If we made it here, there's no letters left.
+        current_state = pick_new_word(filtered_lexicon)
+        return (current_state, 'You win!')
+def state_changed(current_state):
+    clue = create_hangman_clue(current_state['word'], current_state['guessed_letters'])
+    guessed_letters = current_state['guessed_letters']
+    remaining_chances = current_state['remaining_chances']
+    return (clue, guessed_letters, remaining_chances)
+with gr.Blocks(theme=gr.themes.Soft(), title="karijona Hangman") as hangman:
+    current_word = gr.State(pick_new_word(filtered_lexicon))
+    gr.Markdown("# karijona Hangman")
+    with gr.Row():
+        current_word_textbox = gr.Textbox(label="Clue", interactive=False, value=create_hangman_clue(current_word.value['word'], current_word.value['guessed_letters']))
+        guessed_letters_textbox = gr.Textbox(label="Guessed letters", interactive=False)
+        remaining_chances_textbox = gr.Textbox(label="Remaining chances", interactive=False, value=6)
+    guess_textbox = gr.Textbox(label="Guess")
+    guess_button = gr.Button(value="Submit")
+    output_textbox = gr.Textbox(label="Result", interactive=False)
+    guess_button.click(fn=hangman_game, inputs=[current_word, guess_textbox], outputs=[current_word, output_textbox])\
+                .then(fn=state_changed, inputs=[current_word], outputs=[current_word_textbox, guessed_letters_textbox, remaining_chances_textbox])
+hangman.launch()

.ipynb_checkpoints/util-checkpoint.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import re
+import unicodedata
+def strip_accents(text: str) -> str:
+    """Removes accents from text."""
+    return ''.join(c for c in unicodedata.normalize('NFD', text)
+                  if unicodedata.category(c) != 'Mn')
+def load_raw_text(corpus_directory: str, file_names=None) -> str:
+    """Loads all the text files in a directory into one large string"""
+    corpus = ""
+    for file_name in os.listdir(corpus_directory):
+        # Read the file as a string
+        file_path = os.path.join(corpus_directory, file_name)
+        if os.path.isdir(file_path):
+            continue
+        #  Make sure we only read text files
+        if ".txt" not in file_name:
+            continue
+        with open(file_path, 'r') as file:
+            file_contents = file.read()
+            corpus += (file_contents + "\n")
+    return corpus
+word_regex = r"[\w|\']+"
+def tokenize(text):
+    return re.findall(word_regex, text)
+def preprocess(text):
+    """Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
+    text = strip_accents(text)
+    text = text.lower()
+    tokens = text.split(" ")
+    tokens_filtered = []
+    for token in tokens:
+        # Skip any tokens with special characters
+        if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
+            tokens_filtered.append(token)
+    return tokens_filtered
+def pad(text: list, num_padding: int):
+    """Pads the given text, as a list of strings, with <s> characters between sentences."""
+    padded_text = []
+    # Add initial padding to the first sentence
+    for _ in range(num_padding):
+        padded_text.append("<s>")
+    for word in text:
+        padded_text.append(word)
+        # Every time we see an end punctuation mark, add <s> tokens before it
+        # REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
+        if word in [".", "?", "!"]:
+            for _ in range(num_padding):
+                padded_text.append("<s>")
+    return padded_text

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Karijona Hangman
-emoji: ⚡
-colorFrom: red
-colorTo: yellow
-sdk: gradio
-sdk_version: 3.35.2
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Karijona_Hangman
 app_file: app.py
+sdk: gradio
+sdk_version: 3.34.0
 ---

app.py ADDED Viewed

	@@ -0,0 +1,159 @@

+# Load our data
+import os
+def load_raw_text(corpus_directory: str, file_names=None) -> str:
+    """Loads all the text files in a directory into one large string"""
+    corpus = ""
+    for file_name in os.listdir(corpus_directory):
+        # Read the file as a string
+        file_path = os.path.join(corpus_directory, file_name)
+        if os.path.isdir(file_path):
+            continue
+        #  Make sure we only read text files
+        if ".txt" not in file_name:
+            continue
+        with open(file_path, 'r') as file:
+            file_contents = file.read()
+            corpus += (file_contents + "\n")
+    return corpus
+# REPLACE WITH YOUR CORPUS DIRECTORY
+corpus = load_raw_text(corpus_directory="./corpus")
+import re
+import util
+# TODO: Strip accents using util.strip_accents
+# TODO: Make corpus lowercase
+corpus = corpus.lower()
+# TODO: Split corpus into tokens using the following function
+word_regex = r"[a-zïëñ]+"
+def tokenize(text: str):
+    return re.findall(word_regex, text)
+s_tok = tokenize(corpus)
+# TODO: Create a set named "lexicon" with all of the unique words
+lexicon = set()
+for word in s_tok:
+    lexicon.add(word)
+filtered_lexicon = set()
+for word in lexicon:
+    if 3 <= len(word) <= 7:
+        filtered_lexicon.add(word)
+import random
+def random_scramble(lexicon: set):
+    lexicon = list(lexicon)
+    word = random.choice(lexicon)
+    # Turn the word into a list of characters
+    word_chars = list(word)
+    # Shuffle those characters
+    random.shuffle(word_chars)
+    # Re-join the characters into a string
+    shuffled = ''.join(word_chars)
+    return {'shuffled': shuffled, 'original': word}
+import gradio as gr
+from typing import Tuple
+def create_hangman_clue(word, guessed_letters):
+    """
+    Given a word and a list of letters, create the correct clue.
+    For instance, if the word is 'apple' and the guessed letters are 'a' and 'l', the clue should be 'a _ _ l _'
+    """
+    clue = ''
+    for letter in word:
+        if letter in guessed_letters:
+            clue += letter + ' '
+        else:
+            clue += '_ '
+    return clue
+def pick_new_word(lexicon):
+    lexicon = list(lexicon)
+    return {
+        'word': random.choice(lexicon),
+        'guessed_letters': set(),
+        'remaining_chances': 6
+    }
+def hangman_game(current_state, guess):
+    """Update the current state based on the guess."""
+    if guess in current_state['guessed_letters'] or len(guess) > 1:
+        # Illegal guess, do nothing
+        return (current_state, 'Invalid guess')
+    current_state['guessed_letters'].add(guess)
+    if guess not in current_state['word']:
+        # Wrong guess
+        current_state['remaining_chances'] -= 1
+        if current_state['remaining_chances'] == 0:
+            # No more chances! New word
+            current_state = pick_new_word(filtered_lexicon)
+            return (current_state, 'You lose!')
+        else:
+            return (current_state, 'Wrong guess :(')
+    else:
+        # Right guess, check if there's any letters left
+        for letter in current_state['word']:
+            if letter not in current_state['guessed_letters']:
+                # Still letters remaining
+                return (current_state, 'Correct guess!')
+        # If we made it here, there's no letters left.
+        current_state = pick_new_word(filtered_lexicon)
+        return (current_state, 'You win!')
+def state_changed(current_state):
+    clue = create_hangman_clue(current_state['word'], current_state['guessed_letters'])
+    guessed_letters = current_state['guessed_letters']
+    remaining_chances = current_state['remaining_chances']
+    return (clue, guessed_letters, remaining_chances)
+with gr.Blocks(theme=gr.themes.Soft(), title="karijona Hangman") as hangman:
+    current_word = gr.State(pick_new_word(filtered_lexicon))
+    gr.Markdown("# karijona Hangman")
+    with gr.Row():
+        current_word_textbox = gr.Textbox(label="Clue", interactive=False, value=create_hangman_clue(current_word.value['word'], current_word.value['guessed_letters']))
+        guessed_letters_textbox = gr.Textbox(label="Guessed letters", interactive=False)
+        remaining_chances_textbox = gr.Textbox(label="Remaining chances", interactive=False, value=6)
+    guess_textbox = gr.Textbox(label="Guess")
+    guess_button = gr.Button(value="Submit")
+    output_textbox = gr.Textbox(label="Result", interactive=False)
+    guess_button.click(fn=hangman_game, inputs=[current_word, guess_textbox], outputs=[current_word, output_textbox])\
+                .then(fn=state_changed, inputs=[current_word], outputs=[current_word_textbox, guessed_letters_textbox, remaining_chances_textbox])
+hangman.launch()

corpus/1.txt ADDED Viewed

	@@ -0,0 +1,73 @@

+Karijona ekarï
+Karijona ekarï notonaga ëkëimë mono kërïtawënobë. Karijona echiwaketï nïjïnëjïjëbë notonaga karijona ekarï. Irajo jiyachi, kuyugo dïjïrë mëkë karijona ekarïko
+esetï karama, jiyanakoto, yakauyana, werewereru, majotoyana, kaikusana, machijuyana, saja saja, namororeke nai ërarërë nesejoyanë marë. Irebë nïwowanë toto.
+— Ëteke manai?
+— Kure wae (ganë)?
+— Ëtï mïjëkae?
+— Wui tïrïyae tujitëjo
+— Wïtëeja.
+dëmë nesejoyanë marë, marë, marë. Irëbë niwowanë toto:
+— Ëteke manatëi?
+— Aña kuregïñake nai.
+Ëwï yesetï nai . Yiye esetï nai Ëwï yumu esetï nai . Yakëmijë esetï gërëja nai  Yinotï esetï nai  Tamu esetï nai . Tïmugëake wae. Mësa mëitïto.
+Karijona eremirï etase manai?
+2. Ëjutujë eurukuse manai?
+3. Yeremirï tae ëtëkëne tïyajoro. 4. Tïyajoro ejaragae echinemae. 5. Tïyajoro etunutëkë.
+6. Nekë nechinemanë mëitïto.
+Aime toto nai tïyajoro.
+26. Itu tawëdoko aime nai.
+27. Tetunutë ake.
+28. Tejaragae tïyajoro ekayakarï.
+29. Akorodoko ejaragasegërë nai.
+Mësa mëitïto nai.
+Ësanobë meyae?
+Ësanobë wërichi neyae? Ësanobë mure neyanë? Ësanobë gïrï neyanë?
+Aña tujitë tërënobë neyae. Ësanobë añamoro meyatëi? Ësanobë mëkamoro neyanë?
+yitudae
+mutudae
+nutudae
+nutudae
+mutudatëi
+nutudanë
+Ëti jëkë manai? Ëti jëkë nërë nai? Ëti jëkë manatëi?
+ëwï
+ëmërë
+nërë
+aña
+añamoro
+mëkamoro
+itawarï
+tujitë
+mïnë
+Sëkënërë atakëmicha dëmë terejarïko wae: teñi jaru jëmïrï, serawërë nureimë, kënëkërëne jëmëi. Mëkë iwajotorï dëmë ikucha wae. Nërë nejï. Ëñaotoene ikucha: sëkënërë jëne, serawërë kunañi. Nai gërëja oworï iyatënë.
+Teñi Sëkënërë Serawërë Kënëtëkërëne Ëñatoene
+Ëwï ajereme teñi kaikuchi yeku nai. Ëwï ajereme sëkënërë nureimë nai. Ëwï ajereme serawërë kunañi nai. Ëwï ajereme kënëtëkërëne jëmëi nai. Ëwï ajereme eñatoene jaru
+Ëteke manatëi? Manatëi reke?
+Kure
+Kure dïjïrë
+Kure aña nai
+Uwareke
+Uwa
+Mëjënae?
+Mëjënuyae
+Ikucha mïsakae?
+Meremiruyae?
+Wëiko
+Jiji
+Enuko Enu
+Jiji
+Wërï
+Muchu
+Tamu
+Wëiko Weikorï
+Jiji
+Akëmijë
+Yakëmijë
+Iyarijarï
+Yarijarï
+Wajotorï
+Yiwajotorï
+Echirï Yechirï
+Muguru

util.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import re
+import unicodedata
+def strip_accents(text: str) -> str:
+    """Removes accents from text."""
+    return ''.join(c for c in unicodedata.normalize('NFD', text)
+                  if unicodedata.category(c) != 'Mn')
+def load_raw_text(corpus_directory: str, file_names=None) -> str:
+    """Loads all the text files in a directory into one large string"""
+    corpus = ""
+    for file_name in os.listdir(corpus_directory):
+        # Read the file as a string
+        file_path = os.path.join(corpus_directory, file_name)
+        if os.path.isdir(file_path):
+            continue
+        #  Make sure we only read text files
+        if ".txt" not in file_name:
+            continue
+        with open(file_path, 'r') as file:
+            file_contents = file.read()
+            corpus += (file_contents + "\n")
+    return corpus
+word_regex = r"[\w|\']+"
+def tokenize(text):
+    return re.findall(word_regex, text)
+def preprocess(text):
+    """Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
+    text = strip_accents(text)
+    text = text.lower()
+    tokens = text.split(" ")
+    tokens_filtered = []
+    for token in tokens:
+        # Skip any tokens with special characters
+        if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
+            tokens_filtered.append(token)
+    return tokens_filtered
+def pad(text: list, num_padding: int):
+    """Pads the given text, as a list of strings, with <s> characters between sentences."""
+    padded_text = []
+    # Add initial padding to the first sentence
+    for _ in range(num_padding):
+        padded_text.append("<s>")
+    for word in text:
+        padded_text.append(word)
+        # Every time we see an end punctuation mark, add <s> tokens before it
+        # REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
+        if word in [".", "?", "!"]:
+            for _ in range(num_padding):
+                padded_text.append("<s>")
+    return padded_text