/* ** utilities.c ** ** General functions for the GUI interface ** ** mg - May 1995 ** ** This file contains any function that may be called by ** more than one module. */ #define G_EXTERN extern #include "defs.h" /* ** Length of query term before it needs to be wrapped */ #define QUERY_WINDOW_LENGTH 22 #define ALL_OPS "ABSEGN" static char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; int check_through_terms(char *, char); void check_against_rels(int); void find_adj_set(char *, int *, int *); void terminate_run(char *, char *, int); void de_space(char *); G_EXTERN int calc_little_r(int); G_EXTERN double calc_wgt(int, int, char); G_EXTERN double calc_rsv(double, int, char); G_EXTERN void sort_user_rels(struct user_rj *, int, int); char *read_env(char env_variable[]) { int debugging = atoi(getenv("READ_ENV_DEBUG")); char *return_value; char variable_value[SMALL_BUFFER]; return_value = getenv(env_variable); if (return_value == NULL) { fprintf(stderr, "ERROR READING [%s]\n", env_variable); } else { sprintf(variable_value, "%s", getenv(env_variable)); if (debugging) { fprintf(stderr, "read_env() :: %s = [%s]\n", env_variable, variable_value); } } return variable_value; } void set_env(char env_variable[], char env_value[]) { int debugging = atoi(getenv("SET_ENV_DEBUG")); int return_value; char env_command[SMALL_BUFFER]; sprintf(env_command, "%s=%s", env_variable, env_value); if (debugging) { fprintf(stderr, "putenv(%s)\n", env_command); } return_value = putenv(env_command); if (return_value != 0) { fprintf(stderr, "set_env() :: Error Setting [%s]\n", env_command); } } void malloc_error(char *program, char *function, char *buffer, int buffer_size) { /* ** Generates an error message if malloc() fails ** and terminates the program. */ printf("%s:: Failed to allocate [%d] bytes for %s\n", function, buffer_size, buffer); exit(0); } void malloc_success(char *program, char *function, char *buffer, int buffer_size) { printf("%s:: Allocated [%d] bytes for %s\n", function, buffer_size, buffer); } void current_time(char *runtime) { long time; long lasttime; struct timeval tp; struct timezone tzp; gettimeofday(&tp, &tzp); time = tp.tv_sec; ELAPSED_TIME = ELAPSED_TIME + time - LAST_TIME; sprintf(runtime, "%.3d:%.2d", ELAPSED_TIME / 60, ELAPSED_TIME % 60); LAST_TIME = time; } void show_time(char *function, int where) { char elapsed[RUNTIME_LENGTH]; current_time(elapsed); if (where == START) { printf("%15s -- start: ", function); } else { printf("%15s -- finish: ", function); } printf("%s\n", elapsed); } void form_phrase(struct user_input *user_query, int newdb) { /* ** The terms that constitute the phrase are passed to this function ** in a structure of type 'user_input'. ** ** Also passed to the function is a char string not_found_list[] ** (in which is returned the source phrase if it is not found). */ int both_phrase_ops = atoi(getenv("BOTH_PHRASE_OPS")); int return_code; int npostings; int existing_term_no = NONE_ASSIGNED; int set; int adj_set = NONE_ASSIGNED; int adj_np; int adj_r; double adj_wgt; int sas_set = NONE_ASSIGNED; int sas_np; int sas_r; double sas_wgt; int sas_not_adj_set = NONE_ASSIGNED; int sas_not_adj_np; int sas_not_adj_r; double sas_not_adj_wgt; int temp_sets[MAX_PHRASE_LENGTH]; int temporary; int t; int op_case; char bss_command[SMALL_BUFFER]; char final_command[SMALL_BUFFER]; char adj_command[SMALL_BUFFER]; char bss_result[SMALL_BUFFER]; char *rbuf; /* ** First initialise the temp_sets[] array. */ for (temporary = 0; temporary < user_query->no_terms; temporary++) { temp_sets[temporary] = NONE_ASSIGNED; } user_query->parsed_phrase[0] = '\0'; for (t = 0; t < user_query->no_terms; t++) { /* ** First build up parsed phrase to store in (termset) (the source ** phrase is already stored in 'user_query->source_terms'. ** ** GSL terms of type F, H are discarded. ** ** STOP_TERMS = "FH" ** ** gsl_class == 'I' kept so that a phrase like 'human rights' ** will be not be rejected. */ if (strchr(STOP_TERMS, user_query->gsl_class[t]) == NULL) { if (t == 0) { sprintf(user_query->parsed_phrase, "%s", user_query->parsed[t]); } else { sprintf(user_query->parsed_phrase + strlen(user_query->parsed_phrase), " %s", user_query->parsed[t]); } } } if (!newdb) { existing_term_no = check_through_terms(user_query->parsed_phrase, USER_TERM); } else { existing_term_no == NONE_ASSIGNED; } if (existing_term_no == NONE_ASSIGNED) { /* ** Now 'find' the phrase ** ** Build final command (w/o adjacency operator). Only ** include terms where np > 0. */ sprintf(final_command, "find"); user_query->phrase_np = 0; for (t = 0; t < user_query->no_terms; t++) { if (strchr(STOP_TERMS, user_query->gsl_class[t]) == NULL) { sprintf(bss_command, "f t=%s", user_query->parsed[t]); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("form_phrase()", bss_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", &temp_sets[t], &npostings); if (npostings > 0) { sprintf(final_command + strlen(final_command), " s=%d", temp_sets[t]); } else { user_query->phrase_np = NONE_ASSIGNED; } } } /* ** Must find two sets: (a) adj, (b) sames ** ** First do 'adj' (to include intervening stopwords). ** ** user_query->phrase_np defaults to zero (to start with) ** It will have been set to NONE_ASSIGNED if any term ** is not found. */ if (user_query->phrase_np == 0) { /* ** All terms exist ** ** First find adj_set */ sprintf(adj_command, "%s op=%s", final_command, BSS_ADJ); find_adj_set(adj_command, &adj_set, &adj_np); /* ** If no adj set, adj_np = 0 */ if (adj_np == 0) { /* ** No members of adj set; delete adj_set. */ sprintf(bss_command, "delete %d", adj_set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("form_phrase()", bss_command, return_code); } } else { /* ** Calculate little_r and weight */ adj_r = calc_little_r(adj_set); adj_wgt = calc_wgt(adj_np, adj_r, USER_TERM); } /* ** Now do 'sames' if both_phrase_ops */ if (both_phrase_ops) { sprintf(adj_command, "%s op=%s", final_command, BSS_SAMES); find_adj_set(adj_command, &sas_set, &sas_np); /* ** If no sames set, sas_np = 0 */ if (sas_np == 0) { /* ** No members of the sames set; delete sas_set. */ sprintf(bss_command, "delete %d", sas_set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("form_phrase()", bss_command, return_code); } } else { /* ** Calculate little_r and weight */ sas_r = calc_little_r(sas_set); sas_wgt = calc_wgt(sas_np, sas_r, USER_TERM); } } else { sas_np = adj_np; } } /* ** Now set 'op_case' to: ** ** PHRASE_TYPE1 -- n(s) == n(a), n(a) == 0, NO_PHRASE ** or any individual term not found. ** PHRASE_TYPE2 -- n(s) == n(a), n(a) > 0, OP_ADJ ** PHRASE_TYPE3 -- n(s) > n(a), n(a) == 0, OP_SAMES ** PHRASE_TYPE4 -- n(s) > n(a), n(a) > 0, OP_BOTH */ if (user_query->phrase_np == NONE_ASSIGNED) { op_case = PHRASE_TYPE1; user_query->operation = NO_PHRASE; } else if (sas_np == adj_np) { if (adj_np == 0) { op_case = PHRASE_TYPE1; user_query->operation = NO_PHRASE; } else { op_case = PHRASE_TYPE2; user_query->operation = OP_ADJ; } } else { if (adj_np == 0) { op_case = PHRASE_TYPE3; user_query->operation = OP_SAMES; } else { op_case = PHRASE_TYPE4; user_query->operation = OP_BOTH; } } /* ** Now determine set dependent upon 'op_case' */ switch (op_case) { case PHRASE_TYPE1 : /* ** NO_PHRASE -- Either one or more terms do not exist or ** all individual terms exist but neither an ADJ ** nor a SAMES set can be formed. */ user_query->phrase_set = NONE_ASSIGNED; user_query->phrase_wgt = 0.0; user_query->phrase_rsv = 0.0; user_query->phrase_np = 0; user_query->phrase_r = 0; break; case PHRASE_TYPE2 : /* ** Identical non-zero sets - use ADJ set only */ user_query->phrase_set = adj_set; user_query->phrase_np = adj_np; user_query->phrase_r = adj_r; user_query->phrase_wgt = adj_wgt; user_query->phrase_rsv = calc_rsv(adj_wgt, adj_r, USER_TERM); break; case PHRASE_TYPE3 : /* ** OP_SAMES -- n(S) > 0, n(A) = 0 */ user_query->phrase_set = sas_set; user_query->phrase_np = sas_np; user_query->phrase_r = sas_r; user_query->phrase_wgt = sas_wgt; user_query->phrase_rsv = calc_rsv(sas_wgt, sas_r, USER_TERM); break; case PHRASE_TYPE4 : /* ** OP_BOTH -- sas_np > adj_np, adj_np > 0 ** ** Form set s(N) = s(S) NOT s(A). ** Record np for this set, but use wgt(S). ** ** Then form single set a: ** ** f s=s(N) w=w(S) s=s(A) w=w(A) op=DEFAULT_OP */ sprintf(bss_command, "f s=%d s=%d op=not", sas_set, adj_set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("form_phrase()", bss_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", &sas_not_adj_set, &sas_not_adj_np); sprintf(bss_command, "find s=%d w=%f s=%d w=%f op=%s", adj_set, adj_wgt, sas_not_adj_set, sas_wgt, getenv("DEFAULT_OP")); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("form_phrase()", bss_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", &set, &npostings); user_query->phrase_set = set; user_query->phrase_np = sas_np; user_query->phrase_r = calc_little_r(set); user_query->phrase_wgt = calc_wgt(sas_np, user_query->phrase_r, USER_TERM); user_query->phrase_rsv = calc_rsv(user_query->phrase_wgt, user_query->phrase_r, USER_TERM); /* ** Now delete the 'adj', 'sames' and 'sas_not_adj' sets. */ if (adj_set != NONE_ASSIGNED) { sprintf(bss_command, "delete %d", adj_set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("form_phrase()", bss_command, return_code); } } if (sas_set != NONE_ASSIGNED) { sprintf(bss_command, "delete %d", sas_set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("form_phrase()", bss_command, return_code); } } if (sas_not_adj_set != NONE_ASSIGNED) { sprintf(bss_command, "delete %d", sas_not_adj_set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("form_phrase()", bss_command, return_code); } } break; } } /* ** Now delete all temporary sets. */ for (t = 0; t < user_query->no_terms; t++) { if (temp_sets[t] != NONE_ASSIGNED) { sprintf(bss_command, "delete %d", temp_sets[t]); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("form_phrase()", bss_command, return_code); } temp_sets[t] = NONE_ASSIGNED; } } } void find_adj_set(char adj_command[], int *adj_set, int *adj_np) { /* ** function to form an adjacency set, either 'adj' or 'sames' */ int return_code; char bss_command[SMALL_BUFFER]; char bss_result[SMALL_BUFFER]; char *rbuf; if (i0(adj_command, bss_result) < 0) { terminate_run("find_adj_set()", adj_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", adj_set, adj_np); if ((*adj_np) == 0) { /* ** No adjacency; delete set */ sprintf(bss_command, "delete %d", *adj_set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("find_adj_set()", bss_command, return_code); } *adj_set = NONE_ASSIGNED; } } int old_split_up(char termset_entry[], struct query_terms *next_term, int i) { /* ** String is of the form: ** ** :::: */ int debugging = atoi(getenv("SPLIT_UP_DEBUG")); int ch; int colon_count = 0; int valid; int length; int chars_read; int rj; char *tbuf; char source[MAX_TERM_LENGTH]; /* ** First validate termset_entry by testing for colon characters. ** There should be 12 + BIGR in a valid string; */ if (debugging) { fprintf(stderr, "old_split_up:: [%s]\n", termset_entry); } for (ch = 0; ch < strlen(termset_entry); ch++) { if (termset_entry[ch] == ':') { colon_count++; } } valid = (colon_count == (BIGR + 10)); if (valid) { tbuf = termset_entry; sscanf(tbuf, "%d:%d:%d:%d:%lf:%lf:%c:%c:%n", &(next_term + i)->term_no, &(next_term + i)->bss_set, &(next_term + i)->np, &(next_term + i)->little_r, &(next_term + i)->weight, &(next_term + i)->rsv, &(next_term + i)->term_type, &(next_term + i)->attribute, &chars_read); tbuf += chars_read; length = strchr(tbuf, ':') - tbuf; memcpy(source, tbuf, length); source[length] = '\0'; de_space(source); sprintf((next_term + i)->source, "%s", source); tbuf += length + 1; length = strchr(tbuf, ':') - tbuf; memcpy((next_term + i)->parsed, tbuf, length); (next_term + i)->parsed[length] = '\0'; tbuf += length + 1; sscanf(tbuf, "%c%n", &(next_term + i)->operation, &chars_read); tbuf += chars_read; /* ** Now read relj info from end of record */ for (rj = 0; rj < BIGR; rj++) { sscanf(tbuf, ":%d%n", &(next_term + i)->relj[rj], &chars_read); tbuf += chars_read; } } return valid; } int split_up(char termset_entry[], struct query_terms *next_term, int i) { /* ** String is of the form: ** ** :::::: ** ::::: ** :: ... : ** ** Re-written Feb 1997 to include validation so that ** application does not crash. */ int valid = TRUE; int entry_length = 0; int chars_sofar = 0; int length; int chars_read; int rj; /* ** termset fields */ int term_no; int bss_set; int np; int little_r; double weight; double rsv; char term_type; char attribute; char parsed[MAX_TERM_LENGTH]; char source[MAX_TERM_LENGTH]; char operation; int relj; char *tbuf; tbuf = termset_entry; entry_length = strlen(tbuf); sscanf(tbuf, "%d:%n", &term_no, &chars_read); tbuf += chars_read; chars_sofar += chars_read; valid = (term_no > 0 && chars_sofar <= entry_length); if (valid) { (next_term + i)->term_no = term_no; sscanf(tbuf, "%d:%n", &bss_set, &chars_read); tbuf += chars_read; chars_sofar += chars_read; valid = ((bss_set >= 0 && bss_set < 16384) && (chars_sofar <= entry_length)); if (valid) { (next_term + i)->bss_set = bss_set; sscanf(tbuf, "%d:%n", &np, &chars_read); tbuf += chars_read; chars_sofar += chars_read; valid = (chars_sofar <= entry_length); if (valid) { (next_term + i)->np = np; sscanf(tbuf, "%d:%n", &little_r, &chars_read); tbuf += chars_read; chars_sofar += chars_read; valid = (little_r >= 0 && chars_sofar <= entry_length); if (valid) { (next_term + i)->little_r = little_r; sscanf(tbuf, "%lf:%n", &weight, &chars_read); tbuf += chars_read; chars_sofar += chars_read; valid = (chars_sofar <= entry_length); if (valid) { (next_term + i)->weight = weight; sscanf(tbuf, "%lf:%n", &rsv, &chars_read); tbuf += chars_read; chars_sofar += chars_read; valid = (chars_sofar <= entry_length); if (valid) { (next_term + i)->rsv = rsv; sscanf(tbuf, "%c:%n", &term_type, &chars_read); tbuf += chars_read; valid = ((strchr("EU", toupper(term_type)) != NULL) && (chars_sofar <= entry_length)); if (valid) { (next_term + i)->term_type = term_type; sscanf(tbuf, "%c:%n", &attribute, &chars_read); tbuf += chars_read; chars_sofar += chars_read; valid = ((strchr(LEGAL_GSL, toupper(term_type)) != NULL) && (chars_sofar <= entry_length)); if (valid) { (next_term + i)->attribute = attribute; length = strchr(tbuf, ':') - tbuf; valid = (length > 0); if (valid) { /* ** Copy length characters from tbuf to source */ memcpy(source, tbuf, length); source[length] = '\0'; de_space(source); sprintf((next_term + i)->source, "%s", source); tbuf += length + 1; chars_sofar += length + 1; /* ** Now determine length of parsed */ length = strchr(tbuf, ':') - tbuf; valid = (length > 0); if (valid) { /* ** Copy length characters from tbuf to parsed */ memcpy(parsed, tbuf, length); parsed[length] = '\0'; sprintf((next_term + i)->parsed, "%s", parsed); tbuf += length + 1; chars_sofar += length + 1; valid = (chars_sofar <= entry_length); if (valid) { sscanf(tbuf, "%c%n", &operation, &chars_read); tbuf += chars_read; chars_sofar += length + 1; valid = ((strchr(ALL_OPS, toupper(operation)) != NULL) && (chars_sofar <= entry_length)); if (valid) { (next_term + i)->operation = operation; /* ** Now read relj info from end of record */ valid = (strlen(tbuf) == (2 * BIGR)); if (valid) { for (rj = 0; rj < BIGR; rj++) { sscanf(tbuf, ":%d%n", &relj, &chars_read); tbuf += chars_read; valid = ((relj == 0) || (relj == 1)); if (valid) { (next_term + i)->relj[rj] = relj; } else { break; } } } } } } } } } } } } } } } if (valid) { sprintf((next_term + i)->source, "%s", source); (next_term + i)->operation = operation; } return valid; } void blank_term(int term_no) { (termset + term_no)->term_no = NONE_ASSIGNED; (termset + term_no)->bss_set = NONE_ASSIGNED; (termset + term_no)->np = 0; (termset + term_no)->little_r = 0; (termset + term_no)->weight = 0.0; (termset + term_no)->rsv = 0.0; (termset + term_no)->term_type = NO_OP; (termset + term_no)->attribute = NO_OP; (termset + term_no)->parsed[0] = '\0'; (termset + term_no)->source[0] = '\0'; (termset + term_no)->operation = NO_OP; } void rename_logs(int query_pid) { /* ** Rename the history, termset and user rels files */ sprintf(OS_COMMAND, "%s %s %s/%s/%s", MV, history_file, LOGS_DIR, TOPIC_NAME, HISTORY_FILE); if (system(OS_COMMAND) < 0) { perror(OS_COMMAND); } sprintf(OS_COMMAND, "%s %s %s/%s/%s", MV, termset_file, LOGS_DIR, TOPIC_NAME, TERMSET_FILE); if (system(OS_COMMAND) < 0) { perror(OS_COMMAND); } sprintf(OS_COMMAND, "%s %s %s/%s/%s", MV, rels_file, LOGS_DIR, TOPIC_NAME, RELS_FILE); if (system(OS_COMMAND) < 0) { perror(OS_COMMAND); } } void terminate_command_entry(int nfields) { int f; for (f = 0; f < nfields; f++) { fprintf(hf, ":\"\""); if (f == nfields - 1) fprintf(hf, "\n"); fflush(hf); } } void de_space(char query[]) { /* ** Remove spaces from the front and back of a string */ int length; char *sbuf; /* ** Front */ sbuf = query; while (*sbuf == ' ') { sbuf++; } sprintf(query, "%s", sbuf); /* ** Back */ do { length = strlen(query) - 1; if (query[length] == ' ') { query[length] = '\0'; length--; } } while (query[length] == ' '); } int shut_down() { iexit(); } int check_through_terms(char parsed[], char user_type) { /* ** Check to see if parsed already in list. ** Make sure operation field set to uppercase if it's there and np > 0 ** ** Set there to NONE_ASSIGNED by default. ** If it's there set there to the term_no in (termset) */ int current_term; int existing_term_no = NONE_ASSIGNED; for (current_term = 0; current_term < TERM_NO; current_term++) { if (strcmp(parsed, (termset + current_term)->parsed) == 0) { if ((termset + current_term)->np > 0) { /* ** Reset opcode field to uppercase */ (termset + current_term)->operation = toupper((termset + current_term)->operation); /* ** Only need to re-calculate little_r for phrases. If it's a ** single term it will be correct anyway (incremented if ** necessary in the extract_terms() function. */ (termset + current_term)->weight = calc_wgt((termset + current_term)->np, (termset + current_term)->little_r, (termset + current_term)->term_type); (termset + current_term)->rsv = calc_rsv((termset + current_term)->weight, (termset + current_term)->little_r, (termset + current_term)->term_type); (termset + current_term)->term_type = user_type; existing_term_no = current_term; break; } } } return existing_term_no; } void history_file_error(char *TABLE) { fprintf(hf, "%d:%d:%d:\"error\":\"%s\":\"%s\"\n", ++COMMAND_NO, TOPIC_NO, ELAPSED_TIME, TABLE, ERROR_MESSAGE); } void screen_error() { fprintf(stderr, " %s\n", ERROR_MESSAGE); } void tclsh_error(char *error_message) { printf("\n\n*******************************************************\n"); printf("* *\n"); printf("%s\n", error_message); printf("* *\n"); printf("*******************************************************\n\n"); } void hf_header(int command_no, char *command, int extra_parameters) { fprintf(hf, "%d:%d:%d:%s", command_no, TOPIC_NO, ELAPSED_TIME, command); if (extra_parameters == NO_PARAMS) { fprintf(hf, "\n"); } } void shutdown_application() { fclose(hf); fclose(tf); iexit(); exit(0); } void terminate_run(char *calling_fcn, char *command, int return_code) { char bss_command[SMALL_BUFFER]; char bss_results[SMALL_BUFFER]; if ((strcmp(command, "FINISHED") == 0) || (strcmp(command, "quit") == 0)) { printf(" RUN FINISHED SUCCESSFULLY\n\n"); shutdown_application(); } else { sprintf(bss_command, "perror"); i0(bss_command, bss_results); fprintf(stderr, "Calling function [%s]\n\n", calling_fcn); if (return_code < 0) { fprintf(stderr, "There was an ERROR executing the command:\n\n"); } else { fprintf(stderr, "There was a WARNING executing the command:\n\n"); } fprintf(stderr, "%s\n\n", command); fprintf(stderr, "BSS error: [%s] (%d)\n", bss_results, return_code); fprintf(stderr, "BSS_PARMPATH = [%s]\n", getenv("BSS_PARMPATH")); if (return_code < 0) { fprintf(stderr, "The program run will be terminated.\n"); shutdown_application(); } } } void clear_structure(struct query_list_lines r[]) { r = NULL; } void find_docset(char docid[], int *set, int *np) { int debugging = atoi(getenv("FIND_DOCSET_DEBUG")); int return_code; char bss_command[SMALL_BUFFER]; char bss_result[SMALL_BUFFER]; char *rbuf; sprintf(bss_command, "f a=dn t=%s", docid); if (debugging) { fprintf(stderr, "find_docset():: BSS_PARMPATH = [%s]\n", getenv("BSS_PARMPATH")); fprintf(stderr, "find_docset():: bss_command = [%s]\n", bss_command); } if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("find_docset()", bss_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", set, np); if (debugging) { fprintf(stderr, "find_docset():: set = %d, np = %d\n", *set, *np); sprintf(bss_command, "show f=0 s=%d r=0", *set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("find_docset()", bss_command, return_code); } fprintf(stderr, "find_docset():: set = %d, np = %d, docid = [%s]\n", *set, *np, bss_result); } } void check_against_rels(int term_no) { int return_code; int ch; int np; int relj; int rel_set; int temp_set; char docid[MAX_DOCID_LENGTH]; char bss_command[SMALL_BUFFER]; char bss_result[SMALL_BUFFER]; char *rbuf; for (relj = 0; relj < BIGR; relj++) { /* ** Used to find set for document using the docid. Now use the ** internal record number ((user_rels + relj)->abs_recno). ** ** 20/2/97: Reverted to (user_rels + relj)->docid since needs to ** be useable across databases. */ find_docset((user_rels + relj)->docid, &rel_set, &np); if (np == 0) { /* ** relj no longer in the database */ (termset + term_no)->relj[relj] = NONE_ASSIGNED; break; } /* ** The term already has a set -- (termset)->bss_set ** ** AND this set with the rel_set to see if it's in it */ sprintf(bss_command, "f s=%d s=%d op=and", rel_set, (termset + term_no)->bss_set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("check_against_rels()", bss_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", &temp_set, &np); if (np == 0) { /* ** Not in relj */ (termset + term_no)->relj[relj] = 0; } else { (termset + term_no)->relj[relj] = 1; } } } int check_for_paragraph_file() { int debugging = atoi(getenv("CHECK_FOR_PARAGRAPH_FILE_DEBUG")); int buffer_size; char *para_file; FILE *pf; /* ** set buffer size large enough for filename */ buffer_size = strlen(params_file.bib_dir) + strlen(params_file.name) + 16; if ((para_file = (char *) malloc(buffer_size)) == NULL) { malloc_error("utilities.c", "check_for_paragraph_file()", "para_file", buffer_size); } else if (debugging) { malloc_success("utilities.c", "check_for_paragraph_file()", "para_file", buffer_size); } sprintf(para_file, "%s/%s.par", params_file.bib_dir, params_file.name); if ((pf = fopen(para_file, "r")) == NULL) { fprintf(stderr, "No paragraph file [%s]\n", para_file); params_file.para_file = FALSE; } else { fclose(pf); sprintf(para_file, "%s/%s.pardir", params_file.bib_dir, params_file.name); if ((pf = fopen(para_file, "r")) == NULL) { fprintf(stderr, "No paragraph file [%s]\n", para_file); params_file.para_file = FALSE; } else { fclose(pf); params_file.para_file = TRUE; } } } void show_parameter_files() { int loop_var; printf("name = %s\n", params_file.name); printf("lastbibvol = %d\n", params_file.lastbibvol); printf("bib_basename = %s\n", params_file.bib_basename); printf("bib_dir = %s\n", params_file.bib_dir); printf("bibsize = %d\n", params_file.bibsize); printf("real_bibsize = %d\n", params_file.real_bibsize); printf("display_name = %s\n", params_file.display_name); printf("explanation = %s\n", params_file.explanation); printf("nr = %d\n", params_file.nr); printf("nf = %d\n", params_file.nf); for (loop_var = 0; loop_var < params_file.nf; loop_var++) { printf("%5d f_abbrev = %s\n", loop_var, params_file.f_abbrev[loop_var]); } printf("\n"); printf("rec_mult = %d\n", params_file.rec_mult); printf("fixed = %d\n", params_file.fixed); printf("db_type = %s\n", params_file.db_type); printf("has_lims = %d\n", params_file.has_lims); printf("maxreclen = %d\n", params_file.maxreclen); printf("ni = %d\n\n", params_file.ni); for (loop_var = 0; loop_var < params_file.ni; loop_var++) { printf("%5d last_ixvol = %d\n", loop_var, params_file.last_ixvol[loop_var]); printf(" ix_stem = %s\n", params_file.ix_stem[loop_var]); printf(" ix_volsize = %d\n", params_file.ix_volsize[loop_var]); printf(" ix_type = %d\n", params_file.ix_type[loop_var]); } printf("\n"); if (params_file.para_file) { printf("Paragraph file exists.\n"); } else { printf("No Paragraph File.\n"); } } void read_parameter_files() { int debugging = atoi(getenv("READ_PARAMETER_FILES_DEBUG")); int length; int field_no = 0; int index_no = 0; char parameter[V_SMALL_BUFFER]; char value[V_SMALL_BUFFER]; char db_params_file[SMALL_BUFFER]; FILE *pf; char temp[SMALL_BUFFER]; char *tbuf; sprintf(db_params_file, "%s/%s", getenv("BSS_PARMPATH"), DATABASE_NAME); if ((pf = fopen(db_params_file, "r")) == NULL) { fprintf(stderr, "Can't open parameter file [%s]\n", db_params_file); exit(-1); } else { while (!feof(pf)) { if (fgets(temp, SMALL_BUFFER, pf) == NULL) { break; } else { temp[strlen(temp) - 1] = '\0'; tbuf = temp; length = strchr(tbuf, '=') - tbuf; memcpy(parameter, tbuf, length); parameter[length] = '\0'; tbuf += length + 1; sprintf(value, "%s", tbuf); if (debugging) { fprintf(stderr, "read_parameter_files():: %s = %s\n", parameter, value); } if (strcmp(parameter, "name") == 0) { sprintf(params_file.name, "%s", value); } else if (strcmp(parameter, "lastbibvol") == 0) { params_file.lastbibvol = atoi(value); } else if (strcmp(parameter, "bib_basename") == 0) { sprintf(params_file.bib_basename, "%s", value); } else if (strcmp(parameter, "bib_dir") == 0) { sprintf(params_file.bib_dir, "%s", value); } else if (strcmp(parameter, "bibsize") == 0) { params_file.bibsize = atoi(value); } else if (strcmp(parameter, "real_bibsize") == 0) { params_file.real_bibsize = atoi(value); } else if (strcmp(parameter, "display_name") == 0) { sprintf(params_file.display_name, "%s", value); } else if (strcmp(parameter, "explanation") == 0) { sprintf(params_file.explanation, "%s", value); } else if (strcmp(parameter, "nr") == 0) { params_file.nr = atoi(value); } else if (strcmp(parameter, "nf") == 0) { params_file.nf = atoi(value); } else if (strcmp(parameter, "f_abbrev") == 0) { sprintf(params_file.f_abbrev[index_no++], "%s", value); } else if (strcmp(parameter, "rec_mult") == 0) { params_file.rec_mult = atoi(value); } else if (strcmp(parameter, "fixed") == 0) { params_file.fixed = atoi(value); } else if (strcmp(parameter, "db_type") == 0) { sprintf(params_file.db_type, "%s", value); } else if (strcmp(parameter, "has_lims") == 0) { params_file.has_lims = atoi(value); } else if (strcmp(parameter, "maxreclen") == 0) { params_file.maxreclen = atoi(value); } else if (strcmp(parameter, "ni") == 0) { params_file.ni = atoi(value); } else if (strcmp(parameter, "last_ixvol") == 0) { params_file.last_ixvol[index_no] = atoi(value); } else if (strcmp(parameter, "ix_stem") == 0) { sprintf(params_file.ix_stem[index_no], "%s", value); } else if (strcmp(parameter, "ix_volsize") == 0) { params_file.ix_volsize[index_no] = atoi(value); } else if (strcmp(parameter, "ix_type") == 0) { params_file.ix_type[index_no++] = atoi(value); } } } fclose(pf); } check_for_paragraph_file(); if (debugging) { show_parameter_files(); } } int open_database(char *database_name, int old_seen) { char db_filename[FILENAME_SIZE]; char dbname[MAX_DBNAME_LENGTH]; char temp[MAX_DBNAME_LENGTH]; FILE *db; int database_open = FALSE; int return_code; int length; char bss_command[SMALL_BUFFER]; char bss_result[SMALL_BUFFER]; char *rbuf; /* ** First read default database name and put into dbname[] */ sprintf(db_filename, "%s/.okapi_db", getenv("GUI_CONFIG_FILES")); if ((db = fopen(db_filename, "r")) == NULL) { fprintf(stderr, "There is no file \".okapi_db\" in [%s]\n\n", getenv("GUI_CONFIG_FILES")); fprintf(stderr, "This file must exist and contain a single line\n"); fprintf(stderr, "specifying the default database name.\n"); exit(0); } fgets(temp, MAX_DBNAME_LENGTH, db); temp[strlen(temp) - 1] = '\0'; sprintf(dbname, "%s", temp); sprintf(DATABASE_NAME, "%s", temp); fprintf(stderr, "Opening Database [%s] ..... ", DATABASE_NAME); sprintf(bss_command, "choose %s", DATABASE_NAME); hf_header(COMMAND_NO, "open_database", PARAMS); fprintf(hf, ":%s:", DATABASE_NAME); if (database_open = (i0(bss_command, bss_result) == 0)) { fprintf(hf, "OK\n"); fprintf(stderr, "OK\n"); fprintf(stderr, "Relevance Feedback is ["); /* ** Write RF state to history file */ hf_header(COMMAND_NO++, "relevance_feedback", PARAMS); if (FEEDBACK) { fprintf(hf, ":ON\n"); fprintf(stderr, "ON]\n"); } else { fprintf(hf, ":OFF\n"); fprintf(stderr, "OFF]\n"); } read_parameter_files(); } else { fprintf(hf, "ERROR\n"); printf("ERROR\n"); iexit(); /* exit(0);*/ } return database_open; } int read_topic_names(struct logfile_info *dirname) { int buffer_size; int no_entries = 0; int max_dir_entries = 1024; char full_entry[FILENAME_SIZE]; struct dirent *entry_ptr; struct stat entry_info; DIR *dirp; if ((dirp = opendir(getenv("OKAPI_LOGS_DIR"))) == NULL) { access_error(getenv("OKAPI_LOGS_DIR")); } buffer_size = max_dir_entries * sizeof(struct dirent); if ((entry_ptr = (struct dirent *) malloc(buffer_size)) == NULL) { malloc_error("utilities.c", "read_topic_names", "entry_ptr", buffer_size); } while ((entry_ptr = readdir(dirp)) != NULL) { sprintf(full_entry, "%s/%s", LOGS_DIR, entry_ptr->d_name); stat(full_entry, &entry_info); if (S_ISDIR(entry_info.st_mode)) { if ((strcmp(entry_ptr->d_name, ".") != 0) && (strcmp(entry_ptr->d_name, "..") != 0)) { sprintf((dirname + no_entries)->logfile, "%s", entry_ptr->d_name); no_entries++; } } } closedir(dirp); free(entry_ptr); return no_entries; } void convert_date(char *ndate, char received_date[]) { /* ** Formats: ** ** numeric_date -> received_date: YYMMDD -> DD-MON-YY */ int date; int length; int ym_len = 2; int mabbr_len = 3; char year[ym_len]; char num_month[ym_len]; char month[mabbr_len]; memcpy(year, ndate, ym_len); year[ym_len] = '\0'; ndate += 2; memcpy(num_month, ndate, ym_len); num_month[ym_len] = '\0'; sprintf(month, "%s", months[atoi(num_month) - 1]); ndate += 2; sprintf(received_date, "%s-%s-%s", ndate, month, year); } void initialise_global_variables() { struct timeval tp; struct timezone tzp; ITERATION_NO = SHOW_SET = NONE_ASSIGNED; SHOW_SET_RECNO = 0; COMMAND_NO = 0; TERM_NO = NO_REMOVED_TERMS = 0; RELJ_COUNT = USER_RELS_COUNT = 0; NEW_TERMS = FALSE; HIT_COUNT = SHOW_COUNT = 0; RJ_COUNT = 0; EDITING_TOPIC = FALSE; EXTRACTED_TERMS = FALSE; NEW_ITERATION = TRUE; FINAL_SEARCH = FALSE; ELAPSED_TIME = 0; CLEAR_RF = FALSE; QUERY_SIZE = 20; LR_THRESHOLD = 2; RSV_FACTOR = 0.6; gettimeofday(&tp, &tzp); LAST_TIME = tp.tv_sec; printf("Initialised global variables.\n"); sprintf(LAST_COMMAND, "START"); } void bss_find(char parsed[], int *set, int *npostings) { int return_code; char bss_command[SMALL_BUFFER]; char bss_result[SMALL_BUFFER]; char *rbuf; sprintf(bss_command, "f t=%s", parsed); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("bss_find()", bss_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", set, npostings); } /* ** Finding new BIGR and SEEN_SET when changing query ** ** 1. Make an empty BIGR set */ void make_new_bigR_set() { int return_code; char bss_command[SMALL_BUFFER]; char bss_result[SMALL_BUFFER]; char *rbuf; sprintf(bss_command, "find t=zzzzzzzzzzzzzzzzzzzz"); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("make_new_bigR_set()", bss_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", &BIGR_SET, &BIGR); if (BIGR > 0) { terminate_run("make_new_bigR_set()", bss_command, return_code); } } /* ** 2. Make an empty SEEN_SET */ void make_new_seen_set() { int return_code; char bss_command[SMALL_BUFFER]; char bss_result[SMALL_BUFFER]; char *rbuf; sprintf(bss_command, "find t=zzzzzzzzzzzzzzzzzzzz"); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("make_new_seen_set()", bss_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", &SEEN_SET, &NO_SEEN); if (NO_SEEN > 0) { terminate_run("make_new_seen_set()", bss_command, return_code); } } void make_RS_sets(int old_seen) { /* ** Make empty BIGR and SEEN sets. ** ** If there is an existing query in which there are viewed docs, ** add these to SEEN_SET is they exist in the new database, ** and to the BIGR_SET if they are +ve rels. */ int return_code; int docid_set; int set; int np; int next_rel; int dc = 0; char docid[MAX_DOCID_LENGTH]; char bss_command[SMALL_BUFFER]; char bss_result[SMALL_BUFFER]; char *rbuf; /* ** Create empty BIGR_SET */ make_new_bigR_set(); /* ** Create empty SEEN_SET */ make_new_seen_set(); /* ** Now check to see if there is an existing query. ** If so add any rels and seen docs from the query ** to the new BIGR and SEEN sets if they exist in ** the new database. */ if (old_seen && USER_RELS_COUNT > 0) { /* ** i.e. some docs were viewed from the old database, ** some of which might be +ve rels. ** ** Add each doc (if it exists in the new database) ** to SEEN_SET. If it does exist in the new database ** add it to the BIGR_SET if its relevance judgement ** is R_FULL or R_PASS ** ** NOTE: If old_seen = TRUE then USER_RELS_COUNT > 0 */ for (next_rel = 0; next_rel < USER_RELS_COUNT; next_rel++) { /* ** Find set for next document */ find_docset((user_rels + next_rel)->docid, &docid_set, &np); if (np == 1) { /* ** Doc exists in new database. ** ** - OR old SEEN_SET with docid_set. This forms set containing ** the docid_set whether it's already there or not. ** - determine new value of NO_SEEN, ** - delete old SEEN_SET ** - create new SEEN_SET */ (user_rels + next_rel)->set = docid_set; sprintf(bss_command, "f s=%d s=%d op=or", SEEN_SET, docid_set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("make_RS_sets()", bss_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", &set, &NO_SEEN); sprintf(bss_command, "delete %d", SEEN_SET); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("make_RS_sets()", bss_command, return_code); } SEEN_SET = set; if ((user_rels + next_rel)->rj != R_NOT) { /* ** It is a +ve rel */ sprintf(bss_command, "f s=%d s=%d op=or", BIGR_SET, docid_set); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("make_RS_sets()", bss_command, return_code); } rbuf = bss_result; sscanf(rbuf, "S%d np=%d", &set, &BIGR); sprintf(bss_command, "delete %d", BIGR_SET); if ((return_code = i0(bss_command, bss_result)) != 0) { terminate_run("make_RS_sets()", bss_command, return_code); } BIGR_SET = set; } } else { (user_rels + next_rel)->set = NONE_ASSIGNED; } } } } void set_user_rels_flag(int sflag) { /* ** Mainly used to set (user_rels + )->sflag to 'N' to ** indicate that this was a rel made before the ** relevance information was cleared from the query */ int rel_count; for (rel_count = 0; rel_count < USER_RELS_COUNT; rel_count++) { (user_rels + rel_count)->sflag = sflag; } } void access_error(char filename[]) { char *error_message; if ((error_message = (char *) malloc(FILENAME_SIZE)) == NULL) { malloc_error("utilities", "access_error", "error_message", FILENAME_SIZE); } sprintf(error_message, "Cannot access directory/filename:\n\n [%s]\n", filename); printf("\n\n*******************************************************\n"); printf("** **\n\n"); printf("%s\n", error_message); printf("** **\n\n"); printf("*******************************************************\n"); free(error_message); exit(0); }