Skip to content

Change argument processing to allow prompt or file args. #103

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 23 additions & 20 deletions chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
fin.close();

std::vector<uint8_t> tmp;

for (int i = 0; i < n_parts; ++i) {
const int part_id = i;
//const int part_id = n_parts - i - 1;
Expand Down Expand Up @@ -797,14 +797,6 @@ int main(int argc, char ** argv) {

gpt_params params;

params.temp = 0.1f;
params.top_p = 0.95f;
params.n_ctx = 2048;
params.interactive = true;
params.interactive_start = true;
params.use_color = true;
params.model = "ggml-alpaca-7b-q4.bin";

if (gpt_params_parse(argc, argv, params) == false) {
return 1;
}
Expand Down Expand Up @@ -856,13 +848,26 @@ int main(int argc, char ** argv) {
// Add a space in front of the first character to match OG llama tokenizer behavior
// params.prompt.insert(0, 1, ' ');
// tokenize the prompt
std::vector<gpt_vocab::id> embd_inp;// = ::llama_tokenize(vocab, params.prompt, true);
std::vector<gpt_vocab::id> embd_inp;

// params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());

// // tokenize the reverse prompt
// std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false);


std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize(vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", true);
std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize(vocab, "### Instruction:\n\n", true);
std::vector<gpt_vocab::id> response_inp = ::llama_tokenize(vocab, "### Response:\n\n", false);
embd_inp.insert(embd_inp.end(), instruct_inp.begin(), instruct_inp.end());

if(!params.prompt.empty()) {
std::vector<gpt_vocab::id> param_inp = ::llama_tokenize(vocab, params.prompt, true);
embd_inp.insert(embd_inp.end(), prompt_inp.begin(), prompt_inp.end());
embd_inp.insert(embd_inp.end(), param_inp.begin(), param_inp.end());
embd_inp.insert(embd_inp.end(), response_inp.begin(), response_inp.end());
}

// fprintf(stderr, "\n");
// fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
// fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
Expand All @@ -871,13 +876,6 @@ int main(int argc, char ** argv) {
// }
// fprintf(stderr, "\n");

std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize(vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", true);
std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize(vocab, "### Instruction:\n\n", true);
std::vector<gpt_vocab::id> response_inp = ::llama_tokenize(vocab, "### Response:\n\n", false);

embd_inp.insert(embd_inp.end(), instruct_inp.begin(), instruct_inp.end());


if (params.interactive) {
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
struct sigaction sigint_action;
Expand Down Expand Up @@ -1076,9 +1074,14 @@ int main(int argc, char ** argv) {

// end of text token
if (embd.back() == 2) {
// fprintf(stderr, " [end of text]\n");
is_interacting = true;
continue;
if (params.interactive) {
is_interacting = true;
continue;
} else {
printf("\n");
fprintf(stderr, " [end of text]\n");
break;
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,17 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
} else if (arg == "-t" || arg == "--threads") {
params.n_threads = std::stoi(argv[++i]);
} else if (arg == "-p" || arg == "--prompt") {
params.interactive = false;
params.interactive_start = false;
params.use_color = false;

params.prompt = argv[++i];
} else if (arg == "-f" || arg == "--file") {

params.interactive = false;
params.interactive_start = false;
params.use_color = false;

std::ifstream file(argv[++i]);

std::copy(std::istreambuf_iterator<char>(file),
Expand Down
13 changes: 7 additions & 6 deletions utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,29 @@
// CLI argument parsing
//

// The default parameters
struct gpt_params {
int32_t seed = -1; // RNG seed
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
int32_t n_predict = 128; // new tokens to predict
int32_t repeat_last_n = 64; // last n tokens to penalize
int32_t n_ctx = 512; //context size
int32_t n_ctx = 2048; //context size

// sampling parameters
int32_t top_k = 40;
float top_p = 0.95f;
float temp = 0.80f;
float temp = 0.10f;
float repeat_penalty = 1.30f;

int32_t n_batch = 8; // batch size for prompt processing

std::string model = "models/lamma-7B/ggml-model.bin"; // model path
std::string model = "ggml-alpaca-7b-q4.bin"; // model path
std::string prompt;

bool use_color = false; // use color to distinguish generations and inputs
bool use_color = true; // use color to distinguish generations and inputs

bool interactive = false; // interactive mode
bool interactive_start = false; // reverse prompt immediately
bool interactive = true; // interactive mode
bool interactive_start = true; // reverse prompt immediately
std::string antiprompt = ""; // string upon seeing which more user input is prompted
};

Expand Down