firobeid commited on
Commit
6ee39c5
·
verified ·
1 Parent(s): 3e20de5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +116 -116
README.md CHANGED
@@ -1,116 +1,116 @@
1
- ---
2
- tags:
3
- - text-generation
4
- - lstm
5
- - tensorflow
6
- library_name: tensorflow
7
- pipeline_tag: text-generation
8
- ---
9
-
10
- # LSTM Text Generation Model
11
-
12
- This model was trained using TensorFlow/Keras for financial article generation tasks.
13
-
14
- ## Model Details
15
-
16
- - **Model Type**: LSTM
17
- - **Framework**: TensorFlow/Keras
18
- - **Task**: Text Generation
19
- - **Vocabulary Size**: 30000
20
- - **Architecture**: Bi-directional Long Short-Term Memory (LSTM)
21
-
22
- ## Usage
23
-
24
- ```python
25
- from huggingface_hub import snapshot_download
26
- import tensorflow as tf
27
- import json
28
- import pickle
29
- import numpy as np
30
-
31
- # Download model files
32
- model_path = snapshot_download(repo_id="firobeid/L4_LSTM_financial_News_Headlines_generator")
33
-
34
- # Load the LSTM model
35
- model = tf.keras.models.load_model(f"{model_path}/lstm_model")
36
-
37
- # Load tokenizer
38
- try:
39
- # Try JSON format first
40
- with open(f"{model_path}/tokenizer.json", 'r', encoding='utf-8') as f:
41
- tokenizer_json = f.read()
42
- tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(tokenizer_json)
43
- except FileNotFoundError:
44
- # Fallback to pickle format
45
- with open(f"{model_path}/tokenizer.pkl", 'rb') as f:
46
- tokenizer = pickle.load(f)
47
-
48
- # Text generation function
49
- import numpy as np
50
- from tensorflow.keras.preprocessing.sequence import pad_sequences
51
-
52
- def preprocess(texts, max_sequence_length = 71):
53
- texts = '<s> %s'.format(texts.lower())
54
- X = np.array(tokenizer.texts_to_sequences([texts])) # REMOVE -1
55
- pad_encoded = pad_sequences(X,
56
- maxlen= max_sequence_length,
57
- padding='pre')
58
- return pad_encoded
59
-
60
- def next_word(model, tokenizer,
61
- text, num_gen_words=1,
62
- randome_sampling = False,
63
- temperature=1):
64
- '''
65
- Randome_Sampling : Using a categorical distribution to predict the character returned by the model
66
- Low temperatures results in more predictable text.
67
- Higher temperatures results in more surprising text.
68
- Experiment to find the best setting.
69
- '''
70
- input_text = text
71
- output_text = [input_text]
72
-
73
- for i in range(num_gen_words):
74
- X_new = preprocess(input_text)
75
-
76
- if randome_sampling:
77
- y_proba = model.predict(X_new, verbose = 0)[0, -1:, :]#first sentence, last token
78
- rescaled_logits = tf.math.log(y_proba) / temperature
79
- pred_word_ind = tf.random.categorical(rescaled_logits, num_samples=1) #REMOVE THIS + 1
80
- pred_word = tokenizer.sequences_to_texts(pred_word_ind.numpy())[0]
81
- else:
82
- y_proba = model.predict(X_new, verbose=0)[0] #first sentence
83
- pred_word_ind = np.argmax(y_proba, axis = -1) #REMOVE THIS + 1
84
- pred_word = tokenizer.index_word[pred_word_ind[-1]]
85
-
86
-
87
- input_text += ' ' + pred_word
88
- output_text.append(pred_word)
89
-
90
- if pred_word == '</s>':
91
- return ' '.join(output_text)
92
-
93
- return ' '.join(output_text)
94
-
95
- def generate_text(model, tokenizer, text, num_gen_words=25, temperature=1, random_sampling=False):
96
- return next_word(model, tokenizer, text, num_gen_words, random_sampling, temperature)
97
-
98
- # Example usage
99
- # Start with these tag: <s>, while keeping words in lower case
100
- generate_text(model,
101
- tokenizer,
102
- "Apple",
103
- num_gen_words = 10,
104
- random_sampling = True,
105
- temperature= 10)
106
- ```
107
-
108
- ## Training
109
-
110
- This model was trained on text data using LSTM architecture for next-word prediction.
111
-
112
- ## Limitations
113
-
114
- - Model performance depends on training data quality and size
115
- - Generated text may not always be coherent for longer sequences
116
- - Model architecture is optimized for the specific vocabulary it was trained on
 
1
+ ---
2
+ tags:
3
+ - text-generation
4
+ - lstm
5
+ - tensorflow
6
+ library_name: tensorflow
7
+ pipeline_tag: text-generation
8
+ ---
9
+
10
+ # LSTM Text Generation Model
11
+
12
+ This model was trained using TensorFlow/Keras for financial article generation tasks.
13
+
14
+ ## Model Details
15
+
16
+ - **Model Type**: LSTM
17
+ - **Framework**: TensorFlow/Keras
18
+ - **Task**: Text Generation
19
+ - **Vocabulary Size**: 30000
20
+ - **Architecture**: Bi-directional Long Short-Term Memory (LSTM)
21
+
22
+ ## Usage
23
+
24
+ ```python
25
+ from huggingface_hub import snapshot_download
26
+ import tensorflow as tf
27
+ import json
28
+ import pickle
29
+ import numpy as np
30
+
31
+ # Download model files
32
+ model_path = snapshot_download(repo_id="firobeid/L4_LSTM_financial_News_Headlines_generator")
33
+
34
+ # Load the LSTM model
35
+ model = tf.keras.models.load_model(f"{model_path}/lstm_model")
36
+
37
+ # Load tokenizer
38
+ try:
39
+ # Try JSON format first
40
+ with open(f"{model_path}/tokenizer.json", 'r', encoding='utf-8') as f:
41
+ tokenizer_json = f.read()
42
+ tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(tokenizer_json)
43
+ except FileNotFoundError:
44
+ # Fallback to pickle format
45
+ with open(f"{model_path}/tokenizer.pkl", 'rb') as f:
46
+ tokenizer = pickle.load(f)
47
+
48
+ # Text generation function
49
+ import numpy as np
50
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
51
+
52
+ def preprocess(texts, max_sequence_length = 71):
53
+ texts = '<s> {}'.format(texts.lower())
54
+ X = np.array(tokenizer.texts_to_sequences([texts])) # REMOVE -1
55
+ pad_encoded = pad_sequences(X,
56
+ maxlen= max_sequence_length,
57
+ padding='pre')
58
+ return pad_encoded
59
+
60
+ def next_word(model, tokenizer,
61
+ text, num_gen_words=1,
62
+ randome_sampling = False,
63
+ temperature=1):
64
+ '''
65
+ Randome_Sampling : Using a categorical distribution to predict the character returned by the model
66
+ Low temperatures results in more predictable text.
67
+ Higher temperatures results in more surprising text.
68
+ Experiment to find the best setting.
69
+ '''
70
+ input_text = text
71
+ output_text = [input_text]
72
+
73
+ for i in range(num_gen_words):
74
+ X_new = preprocess(input_text)
75
+
76
+ if randome_sampling:
77
+ y_proba = model.predict(X_new, verbose = 0)[0, -1:, :]#first sentence, last token
78
+ rescaled_logits = tf.math.log(y_proba) / temperature
79
+ pred_word_ind = tf.random.categorical(rescaled_logits, num_samples=1) #REMOVE THIS + 1
80
+ pred_word = tokenizer.sequences_to_texts(pred_word_ind.numpy())[0]
81
+ else:
82
+ y_proba = model.predict(X_new, verbose=0)[0] #first sentence
83
+ pred_word_ind = np.argmax(y_proba, axis = -1) #REMOVE THIS + 1
84
+ pred_word = tokenizer.index_word[pred_word_ind[-1]]
85
+
86
+
87
+ input_text += ' ' + pred_word
88
+ output_text.append(pred_word)
89
+
90
+ if pred_word == '</s>':
91
+ return ' '.join(output_text)
92
+
93
+ return ' '.join(output_text)
94
+
95
+ def generate_text(model, tokenizer, text, num_gen_words=25, temperature=1, random_sampling=False):
96
+ return next_word(model, tokenizer, text, num_gen_words, random_sampling, temperature)
97
+
98
+ # Example usage
99
+ # Start with these tag: <s>, while keeping words in lower case
100
+ generate_text(model,
101
+ tokenizer,
102
+ "Apple",
103
+ num_gen_words = 10,
104
+ random_sampling = True,
105
+ temperature= 10)
106
+ ```
107
+
108
+ ## Training
109
+
110
+ This model was trained on text data using LSTM architecture for next-word prediction.
111
+
112
+ ## Limitations
113
+
114
+ - Model performance depends on training data quality and size
115
+ - Generated text may not always be coherent for longer sequences
116
+ - Model architecture is optimized for the specific vocabulary it was trained on