michaelginn commited on
Commit
1c56d55
·
1 Parent(s): a3679d8

Upload folder using huggingface_hub

Browse files
.ipynb_checkpoints/app-checkpoint.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Load our data
2
+ import os
3
+
4
+ def load_raw_text(corpus_directory: str, file_names=None) -> str:
5
+ """Loads all the text files in a directory into one large string"""
6
+ corpus = ""
7
+
8
+ for file_name in os.listdir(corpus_directory):
9
+ # Read the file as a string
10
+ file_path = os.path.join(corpus_directory, file_name)
11
+ if os.path.isdir(file_path):
12
+ continue
13
+
14
+ # Make sure we only read text files
15
+ if ".txt" not in file_name:
16
+ continue
17
+
18
+ with open(file_path, 'r') as file:
19
+ file_contents = file.read()
20
+ corpus += (file_contents + "\n")
21
+ return corpus
22
+
23
+ # REPLACE WITH YOUR CORPUS DIRECTORY
24
+ corpus = load_raw_text(corpus_directory="./corpus")
25
+
26
+ import re
27
+ import util
28
+
29
+ # TODO: Strip accents using util.strip_accents
30
+
31
+
32
+ # TODO: Make corpus lowercase
33
+ corpus = corpus.lower()
34
+
35
+ # TODO: Split corpus into tokens using the following function
36
+ word_regex = r"[a-zïëñ]+"
37
+ def tokenize(text: str):
38
+ return re.findall(word_regex, text)
39
+ s_tok = tokenize(corpus)
40
+
41
+ # TODO: Create a set named "lexicon" with all of the unique words
42
+ lexicon = set()
43
+ for word in s_tok:
44
+ lexicon.add(word)
45
+
46
+ filtered_lexicon = set()
47
+
48
+ for word in lexicon:
49
+ if 3 <= len(word) <= 7:
50
+ filtered_lexicon.add(word)
51
+
52
+ import random
53
+
54
+ def random_scramble(lexicon: set):
55
+ lexicon = list(lexicon)
56
+
57
+ word = random.choice(lexicon)
58
+
59
+ # Turn the word into a list of characters
60
+ word_chars = list(word)
61
+
62
+ # Shuffle those characters
63
+ random.shuffle(word_chars)
64
+
65
+ # Re-join the characters into a string
66
+ shuffled = ''.join(word_chars)
67
+
68
+ return {'shuffled': shuffled, 'original': word}
69
+
70
+ import gradio as gr
71
+ from typing import Tuple
72
+
73
+ def create_hangman_clue(word, guessed_letters):
74
+ """
75
+ Given a word and a list of letters, create the correct clue.
76
+
77
+ For instance, if the word is 'apple' and the guessed letters are 'a' and 'l', the clue should be 'a _ _ l _'
78
+ """
79
+ clue = ''
80
+ for letter in word:
81
+ if letter in guessed_letters:
82
+ clue += letter + ' '
83
+ else:
84
+ clue += '_ '
85
+ return clue
86
+
87
+
88
+ def pick_new_word(lexicon):
89
+ lexicon = list(lexicon)
90
+
91
+ return {
92
+ 'word': random.choice(lexicon),
93
+ 'guessed_letters': set(),
94
+ 'remaining_chances': 6
95
+ }
96
+
97
+
98
+ def hangman_game(current_state, guess):
99
+ """Update the current state based on the guess."""
100
+
101
+
102
+ if guess in current_state['guessed_letters'] or len(guess) > 1:
103
+ # Illegal guess, do nothing
104
+ return (current_state, 'Invalid guess')
105
+
106
+ current_state['guessed_letters'].add(guess)
107
+
108
+ if guess not in current_state['word']:
109
+ # Wrong guess
110
+ current_state['remaining_chances'] -= 1
111
+
112
+ if current_state['remaining_chances'] == 0:
113
+ # No more chances! New word
114
+ current_state = pick_new_word(filtered_lexicon)
115
+ return (current_state, 'You lose!')
116
+ else:
117
+ return (current_state, 'Wrong guess :(')
118
+
119
+ else:
120
+ # Right guess, check if there's any letters left
121
+ for letter in current_state['word']:
122
+ if letter not in current_state['guessed_letters']:
123
+ # Still letters remaining
124
+ return (current_state, 'Correct guess!')
125
+
126
+ # If we made it here, there's no letters left.
127
+ current_state = pick_new_word(filtered_lexicon)
128
+ return (current_state, 'You win!')
129
+
130
+
131
+ def state_changed(current_state):
132
+ clue = create_hangman_clue(current_state['word'], current_state['guessed_letters'])
133
+ guessed_letters = current_state['guessed_letters']
134
+ remaining_chances = current_state['remaining_chances']
135
+ return (clue, guessed_letters, remaining_chances)
136
+
137
+
138
+ with gr.Blocks(theme=gr.themes.Soft(), title="karijona Hangman") as hangman:
139
+ current_word = gr.State(pick_new_word(filtered_lexicon))
140
+
141
+ gr.Markdown("# karijona Hangman")
142
+
143
+ with gr.Row():
144
+ current_word_textbox = gr.Textbox(label="Clue", interactive=False, value=create_hangman_clue(current_word.value['word'], current_word.value['guessed_letters']))
145
+ guessed_letters_textbox = gr.Textbox(label="Guessed letters", interactive=False)
146
+ remaining_chances_textbox = gr.Textbox(label="Remaining chances", interactive=False, value=6)
147
+
148
+ guess_textbox = gr.Textbox(label="Guess")
149
+ guess_button = gr.Button(value="Submit")
150
+
151
+ output_textbox = gr.Textbox(label="Result", interactive=False)
152
+
153
+ guess_button.click(fn=hangman_game, inputs=[current_word, guess_textbox], outputs=[current_word, output_textbox])\
154
+ .then(fn=state_changed, inputs=[current_word], outputs=[current_word_textbox, guessed_letters_textbox, remaining_chances_textbox])
155
+
156
+ hangman.launch()
157
+
158
+
159
+
.ipynb_checkpoints/util-checkpoint.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import unicodedata
4
+
5
+ def strip_accents(text: str) -> str:
6
+ """Removes accents from text."""
7
+ return ''.join(c for c in unicodedata.normalize('NFD', text)
8
+ if unicodedata.category(c) != 'Mn')
9
+
10
+
11
+ def load_raw_text(corpus_directory: str, file_names=None) -> str:
12
+ """Loads all the text files in a directory into one large string"""
13
+ corpus = ""
14
+
15
+ for file_name in os.listdir(corpus_directory):
16
+ # Read the file as a string
17
+ file_path = os.path.join(corpus_directory, file_name)
18
+ if os.path.isdir(file_path):
19
+ continue
20
+
21
+ # Make sure we only read text files
22
+ if ".txt" not in file_name:
23
+ continue
24
+
25
+ with open(file_path, 'r') as file:
26
+ file_contents = file.read()
27
+ corpus += (file_contents + "\n")
28
+ return corpus
29
+
30
+
31
+ word_regex = r"[\w|\']+"
32
+ def tokenize(text):
33
+ return re.findall(word_regex, text)
34
+
35
+
36
+ def preprocess(text):
37
+ """Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
38
+ text = strip_accents(text)
39
+ text = text.lower()
40
+
41
+ tokens = text.split(" ")
42
+
43
+ tokens_filtered = []
44
+ for token in tokens:
45
+ # Skip any tokens with special characters
46
+ if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
47
+ tokens_filtered.append(token)
48
+ return tokens_filtered
49
+
50
+
51
+ def pad(text: list, num_padding: int):
52
+ """Pads the given text, as a list of strings, with <s> characters between sentences."""
53
+ padded_text = []
54
+
55
+ # Add initial padding to the first sentence
56
+ for _ in range(num_padding):
57
+ padded_text.append("<s>")
58
+
59
+ for word in text:
60
+ padded_text.append(word)
61
+
62
+ # Every time we see an end punctuation mark, add <s> tokens before it
63
+ # REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
64
+ if word in [".", "?", "!"]:
65
+ for _ in range(num_padding):
66
+ padded_text.append("<s>")
67
+
68
+
69
+ return padded_text
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Karijona Hangman
3
- emoji: ⚡
4
- colorFrom: red
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 3.35.2
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Karijona_Hangman
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 3.34.0
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Load our data
2
+ import os
3
+
4
+ def load_raw_text(corpus_directory: str, file_names=None) -> str:
5
+ """Loads all the text files in a directory into one large string"""
6
+ corpus = ""
7
+
8
+ for file_name in os.listdir(corpus_directory):
9
+ # Read the file as a string
10
+ file_path = os.path.join(corpus_directory, file_name)
11
+ if os.path.isdir(file_path):
12
+ continue
13
+
14
+ # Make sure we only read text files
15
+ if ".txt" not in file_name:
16
+ continue
17
+
18
+ with open(file_path, 'r') as file:
19
+ file_contents = file.read()
20
+ corpus += (file_contents + "\n")
21
+ return corpus
22
+
23
+ # REPLACE WITH YOUR CORPUS DIRECTORY
24
+ corpus = load_raw_text(corpus_directory="./corpus")
25
+
26
+ import re
27
+ import util
28
+
29
+ # TODO: Strip accents using util.strip_accents
30
+
31
+
32
+ # TODO: Make corpus lowercase
33
+ corpus = corpus.lower()
34
+
35
+ # TODO: Split corpus into tokens using the following function
36
+ word_regex = r"[a-zïëñ]+"
37
+ def tokenize(text: str):
38
+ return re.findall(word_regex, text)
39
+ s_tok = tokenize(corpus)
40
+
41
+ # TODO: Create a set named "lexicon" with all of the unique words
42
+ lexicon = set()
43
+ for word in s_tok:
44
+ lexicon.add(word)
45
+
46
+ filtered_lexicon = set()
47
+
48
+ for word in lexicon:
49
+ if 3 <= len(word) <= 7:
50
+ filtered_lexicon.add(word)
51
+
52
+ import random
53
+
54
+ def random_scramble(lexicon: set):
55
+ lexicon = list(lexicon)
56
+
57
+ word = random.choice(lexicon)
58
+
59
+ # Turn the word into a list of characters
60
+ word_chars = list(word)
61
+
62
+ # Shuffle those characters
63
+ random.shuffle(word_chars)
64
+
65
+ # Re-join the characters into a string
66
+ shuffled = ''.join(word_chars)
67
+
68
+ return {'shuffled': shuffled, 'original': word}
69
+
70
+ import gradio as gr
71
+ from typing import Tuple
72
+
73
+ def create_hangman_clue(word, guessed_letters):
74
+ """
75
+ Given a word and a list of letters, create the correct clue.
76
+
77
+ For instance, if the word is 'apple' and the guessed letters are 'a' and 'l', the clue should be 'a _ _ l _'
78
+ """
79
+ clue = ''
80
+ for letter in word:
81
+ if letter in guessed_letters:
82
+ clue += letter + ' '
83
+ else:
84
+ clue += '_ '
85
+ return clue
86
+
87
+
88
+ def pick_new_word(lexicon):
89
+ lexicon = list(lexicon)
90
+
91
+ return {
92
+ 'word': random.choice(lexicon),
93
+ 'guessed_letters': set(),
94
+ 'remaining_chances': 6
95
+ }
96
+
97
+
98
+ def hangman_game(current_state, guess):
99
+ """Update the current state based on the guess."""
100
+
101
+
102
+ if guess in current_state['guessed_letters'] or len(guess) > 1:
103
+ # Illegal guess, do nothing
104
+ return (current_state, 'Invalid guess')
105
+
106
+ current_state['guessed_letters'].add(guess)
107
+
108
+ if guess not in current_state['word']:
109
+ # Wrong guess
110
+ current_state['remaining_chances'] -= 1
111
+
112
+ if current_state['remaining_chances'] == 0:
113
+ # No more chances! New word
114
+ current_state = pick_new_word(filtered_lexicon)
115
+ return (current_state, 'You lose!')
116
+ else:
117
+ return (current_state, 'Wrong guess :(')
118
+
119
+ else:
120
+ # Right guess, check if there's any letters left
121
+ for letter in current_state['word']:
122
+ if letter not in current_state['guessed_letters']:
123
+ # Still letters remaining
124
+ return (current_state, 'Correct guess!')
125
+
126
+ # If we made it here, there's no letters left.
127
+ current_state = pick_new_word(filtered_lexicon)
128
+ return (current_state, 'You win!')
129
+
130
+
131
+ def state_changed(current_state):
132
+ clue = create_hangman_clue(current_state['word'], current_state['guessed_letters'])
133
+ guessed_letters = current_state['guessed_letters']
134
+ remaining_chances = current_state['remaining_chances']
135
+ return (clue, guessed_letters, remaining_chances)
136
+
137
+
138
+ with gr.Blocks(theme=gr.themes.Soft(), title="karijona Hangman") as hangman:
139
+ current_word = gr.State(pick_new_word(filtered_lexicon))
140
+
141
+ gr.Markdown("# karijona Hangman")
142
+
143
+ with gr.Row():
144
+ current_word_textbox = gr.Textbox(label="Clue", interactive=False, value=create_hangman_clue(current_word.value['word'], current_word.value['guessed_letters']))
145
+ guessed_letters_textbox = gr.Textbox(label="Guessed letters", interactive=False)
146
+ remaining_chances_textbox = gr.Textbox(label="Remaining chances", interactive=False, value=6)
147
+
148
+ guess_textbox = gr.Textbox(label="Guess")
149
+ guess_button = gr.Button(value="Submit")
150
+
151
+ output_textbox = gr.Textbox(label="Result", interactive=False)
152
+
153
+ guess_button.click(fn=hangman_game, inputs=[current_word, guess_textbox], outputs=[current_word, output_textbox])\
154
+ .then(fn=state_changed, inputs=[current_word], outputs=[current_word_textbox, guessed_letters_textbox, remaining_chances_textbox])
155
+
156
+ hangman.launch()
157
+
158
+
159
+
corpus/1.txt ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Karijona ekarï
2
+ Karijona ekarï notonaga ëkëimë mono kërïtawënobë. Karijona echiwaketï nïjïnëjïjëbë notonaga karijona ekarï. Irajo jiyachi, kuyugo dïjïrë mëkë karijona ekarïko
3
+ esetï karama, jiyanakoto, yakauyana, werewereru, majotoyana, kaikusana, machijuyana, saja saja, namororeke nai ërarërë nesejoyanë marë. Irebë nïwowanë toto.
4
+ — Ëteke manai?
5
+ — Kure wae (ganë)?
6
+ — Ëtï mïjëkae?
7
+ — Wui tïrïyae tujitëjo
8
+ — Wïtëeja.
9
+ dëmë nesejoyanë marë, marë, marë. Irëbë niwowanë toto:
10
+ — Ëteke manatëi?
11
+ — Aña kuregïñake nai.
12
+ Ëwï yesetï nai . Yiye esetï nai Ëwï yumu esetï nai . Yakëmijë esetï gërëja nai Yinotï esetï nai Tamu esetï nai . Tïmugëake wae. Mësa mëitïto.
13
+ Karijona eremirï etase manai?
14
+ 2. Ëjutujë eurukuse manai?
15
+ 3. Yeremirï tae ëtëkëne tïyajoro. 4. Tïyajoro ejaragae echinemae. 5. Tïyajoro etunutëkë.
16
+ 6. Nekë nechinemanë mëitïto.
17
+ Aime toto nai tïyajoro.
18
+ 26. Itu tawëdoko aime nai.
19
+ 27. Tetunutë ake.
20
+ 28. Tejaragae tïyajoro ekayakarï.
21
+ 29. Akorodoko ejaragasegërë nai.
22
+ Mësa mëitïto nai.
23
+
24
+ Ësanobë meyae?
25
+ Ësanobë wërichi neyae? Ësanobë mure neyanë? Ësanobë gïrï neyanë?
26
+ Aña tujitë tërënobë neyae. Ësanobë añamoro meyatëi? Ësanobë mëkamoro neyanë?
27
+ yitudae
28
+ mutudae
29
+ nutudae
30
+ nutudae
31
+ mutudatëi
32
+ nutudanë
33
+ Ëti jëkë manai? Ëti jëkë nërë nai? Ëti jëkë manatëi?
34
+ ëwï
35
+ ëmërë
36
+ nërë
37
+ aña
38
+ añamoro
39
+ mëkamoro
40
+ itawarï
41
+ tujitë
42
+ mïnë
43
+ Sëkënërë atakëmicha dëmë terejarïko wae: teñi jaru jëmïrï, serawërë nureimë, kënëkërëne jëmëi. Mëkë iwajotorï dëmë ikucha wae. Nërë nejï. Ëñaotoene ikucha: sëkënërë jëne, serawërë kunañi. Nai gërëja oworï iyatënë.
44
+ Teñi Sëkënërë Serawërë Kënëtëkërëne Ëñatoene
45
+ Ëwï ajereme teñi kaikuchi yeku nai. Ëwï ajereme sëkënërë nureimë nai. Ëwï ajereme serawërë kunañi nai. Ëwï ajereme kënëtëkërëne jëmëi nai. Ëwï ajereme eñatoene jaru
46
+ Ëteke manatëi? Manatëi reke?
47
+ Kure
48
+ Kure dïjïrë
49
+ Kure aña nai
50
+ Uwareke
51
+ Uwa
52
+ Mëjënae?
53
+ Mëjënuyae
54
+ Ikucha mïsakae?
55
+ Meremiruyae?
56
+ Wëiko
57
+ Jiji
58
+ Enuko Enu
59
+ Jiji
60
+ Wërï
61
+ Muchu
62
+ Tamu
63
+ Wëiko Weikorï
64
+ Jiji
65
+ Akëmijë
66
+ Yakëmijë
67
+ Iyarijarï
68
+ Yarijarï
69
+ Wajotorï
70
+ Yiwajotorï
71
+ Echirï Yechirï
72
+ Muguru
73
+
util.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import unicodedata
4
+
5
+ def strip_accents(text: str) -> str:
6
+ """Removes accents from text."""
7
+ return ''.join(c for c in unicodedata.normalize('NFD', text)
8
+ if unicodedata.category(c) != 'Mn')
9
+
10
+
11
+ def load_raw_text(corpus_directory: str, file_names=None) -> str:
12
+ """Loads all the text files in a directory into one large string"""
13
+ corpus = ""
14
+
15
+ for file_name in os.listdir(corpus_directory):
16
+ # Read the file as a string
17
+ file_path = os.path.join(corpus_directory, file_name)
18
+ if os.path.isdir(file_path):
19
+ continue
20
+
21
+ # Make sure we only read text files
22
+ if ".txt" not in file_name:
23
+ continue
24
+
25
+ with open(file_path, 'r') as file:
26
+ file_contents = file.read()
27
+ corpus += (file_contents + "\n")
28
+ return corpus
29
+
30
+
31
+ word_regex = r"[\w|\']+"
32
+ def tokenize(text):
33
+ return re.findall(word_regex, text)
34
+
35
+
36
+ def preprocess(text):
37
+ """Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
38
+ text = strip_accents(text)
39
+ text = text.lower()
40
+
41
+ tokens = text.split(" ")
42
+
43
+ tokens_filtered = []
44
+ for token in tokens:
45
+ # Skip any tokens with special characters
46
+ if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
47
+ tokens_filtered.append(token)
48
+ return tokens_filtered
49
+
50
+
51
+ def pad(text: list, num_padding: int):
52
+ """Pads the given text, as a list of strings, with <s> characters between sentences."""
53
+ padded_text = []
54
+
55
+ # Add initial padding to the first sentence
56
+ for _ in range(num_padding):
57
+ padded_text.append("<s>")
58
+
59
+ for word in text:
60
+ padded_text.append(word)
61
+
62
+ # Every time we see an end punctuation mark, add <s> tokens before it
63
+ # REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
64
+ if word in [".", "?", "!"]:
65
+ for _ in range(num_padding):
66
+ padded_text.append("<s>")
67
+
68
+
69
+ return padded_text