When working with natural language, there are some operations which are required time and again. Conversational computing requires the ability to convert a phrase between the first and second person, as in; "I am going to Italy for my holidays" and its response "Why are you going to Italy for your holidays?". Sometimes you will need to convert a phrase into a question. In doing so you may encounter the problem of "DO NOT", as in; "I do not like fish", which as an interrogative response is "Why do you not like fish?" Notice that the first person "I" has been changed to the second person "you". Also the "I do not" phrase has been rearranged to "Why do you not".
Include files for an NLP toolbox;
#includeTruncating a string by a specified length;#include #include #include
void truncstr(char *p,int num) { /* Truncate string by losing last num characters */ if (num < strlen(p)) p[strlen(p) - num] = 0; }Changing a string's length by inserting or deleting characters;
void strlench(char *p,int num) { /* Change length of string by adding or deleting characters */ if (num > 0) memmove(p + num,p,strlen(p) + 1); else { num = 0 - num; memmove(p,p + num,strlen(p) + 1); } }Inserting one string into another string;
void strins(char *p, char *q) { /* Insert string q into p */ strlench(p,strlen(q)); strncpy(p,q,strlen(q)); }Replacing all occurences of one substring within a string with another substring;
void strchg(char *data, char *s1, char *s2) { /* Replace all occurences of s1 with s2 */ char *p; char changed; do { changed = 0; p = strstr(data,s1); if (p) { /* Delete original string */ strlench(p,0 - strlen(s1)); /* Insert replacement string */ strins(p,s2); changed = 1; } } while(changed); }Switching between first and second person references in a string;
char *strftos(char *data) { /* convert first person to second person */ /* I ---------> YOU */ /* ME --------> YOU */ /* I AM ------> YOU ARE */ /* YOUR ------> MY */ /* MY --------> YOUR */ /* YOU ARE ---> I AM */ /* YOU -------> I */ /* I WAS -----> YOU WERE */ /* YOU WERE --> I WAS */ char *copy; char *p; char *q; char *r; strcat(data," "); /* Allocate space for a copy of the string */ copy = malloc(strlen(data) * 2); if (copy == NULL) return NULL; strcpy(copy,""); /* Check first phrase */ p = data; do { if (strncmp(p,"I AM",4) == 0) { q = &p[4]; if (ispunct(*q) || *q == 0 || *q == 32) { strcat(copy,"YOU ARE "); copy[strlen(copy) - 1] = *q; } p += 5; } else if (strncmp(p,"I am",4) == 0) { q = &p[4]; if (ispunct(*q) || *q == 0 || *q == 32) { if (p == data) strcat(copy,"You are "); else { p--; if (*p == '.') strcat(copy,"You are "); else strcat(copy,"you are "); p++; } copy[strlen(copy) - 1] = *q; p += 5; } } else if (strncmp(p,"I WAS ",6) == 0) { strcat(copy,"YOU WERE "); p += 6; } else if (strncmp(p,"I was ",6) == 0) { if (p == data) strcat(copy,"You were "); else { p--; if (*p == '.') strcat(copy,"You were "); else strcat(copy,"you were "); p++; } p += 6; } else if (strncmp(p,"YOU ARE",7) == 0) { q = &p[7]; if (ispunct(*q) || *q == 0 || *q == 32) { strcat(copy,"I AM "); copy[strlen(copy) - 1] = *q; p += 8; } } else if (strncmp(p,"You are",7) == 0) { q = &p[7]; if (ispunct(*q) || *q == 0 || *q == 32) { strcat(copy,"I am "); copy[strlen(copy) - 1] = *q; p += 8; } } else if (strncmp(p,"you are",7) == 0) { q = &p[7]; if (ispunct(*q) || *q == 0 || *q == 32) { strcat(copy,"I am "); copy[strlen(copy) - 1] = *q; p += 8; } } else if (strncmp(p,"YOU WERE ",9) == 0) { strcat(copy,"I WAS "); p += 9; } else if (strncmp(p,"You were ",9) == 0) { strcat(copy,"I was "); p += 9; } else if (strncmp(p,"you were ",9) == 0) { strcat(copy,"I was "); p += 9; } else if (strncmp(p,"I ",2) == 0) { /* Is the next word uppercase ? */ if (p == data) { q = p; q++; while(*q == 32) q++; if(islower(*q)) strcat(copy,"You "); else strcat(copy,"YOU "); } else { q = p; q++; while(*q == 32) q++; if(islower(*q)) strcat(copy,"you "); else strcat(copy,"YOU "); } p += 2; } else if (strncmp(p,"YOU",3) == 0) { r = &p[3]; if (ispunct(*r) || *r == 0 || *r == 32) { if (p == data) strcat(copy,"I "); else { q = p; while(*q != '.' && q >= data) { q--; if (*q != 32) break; } if (q == data || *q == '.') strcat(copy,"I "); else strcat(copy,"ME "); } copy[strlen(copy) - 1] = *r; p += 4; } } else if (strncmp(p,"You",3) == 0) { r = &p[3]; if (ispunct(*r) || *r == 0 || *r == 32) { strcat(copy,"I "); p += 4; copy[strlen(copy) -1] = *r; } } else if (strncmp(p,"you",3) == 0) { r = &p[3]; if (ispunct(*r) || *r == 0 || *r == 32) { strcat(copy,"me "); p += 4; copy[strlen(copy) -1] = *r; } } else if (strncmp(p,"ME ",3) == 0) { strcat(copy,"YOU "); p += 3; } else if (strncmp(p,"Me ",3) == 0) { strcat(copy,"You "); p += 3; } else if (strncmp(p,"me ",2) == 0) { strcat(copy,"you "); p += 3; } else if (strncmp(p,"YOUR ",5) == 0) { strcat(copy,"MY "); p += 5; } else if (strncmp(p,"Your ",5) == 0) { strcat(copy,"My "); p += 5; } else if (strncmp(p,"your ",5) == 0) { strcat(copy,"my "); p += 5; } else if (strncmp(p,"MY ",3) == 0) { strcat(copy,"YOUR "); p += 3; } else if (strncmp(p,"My ",3) == 0) { strcat(copy,"Your "); p += 3; } else if (strncmp(p,"my ",3) == 0) { strcat(copy,"your "); p += 3; } else if (strncmp(p,"ME.",3) == 0) { strcat(copy,"YOU."); p += 3; } else if (strncmp(p,"Me.",3) == 0) { strcat(copy,"You."); p += 3; } else if (strncmp(p,"me.",2) == 0) { strcat(copy,"you."); p += 3; } else if (strncmp(p,"YOU.",4) == 0) { strcat(copy,"ME."); p += 4; } else if (strncmp(p,"You.",4) == 0) { strcat(copy,"Me."); p += 4; } else if (strncmp(p,"you.",4) == 0) { strcat(copy,"me."); p += 4; } q = ©[strlen(copy)]; /* Step to next word */ while(*p && *p != 32) *q++ = *p++; while(strchr(" ,.:;?!",*p) && *p) *q++ = *p++; *q = 0; } while(*p); /* Remove trailing space */ truncstr(copy,1); /* Transfer copy to data */ strcpy(data,copy); /* Free memory */ free(copy); return data; }Correcting gramatical errors which may occur when converting phrases;
void strcor(char *data) { /* Correct common gramatical errors */ char *p; p = strstr(data,"WHY YOU DO NOT"); if (p) strncpy(p,"WHY DO YOU NOT ",14); p = strstr(data,"WHY ME DO NOT"); if (p) { strlench(p,-1); strncpy(p,"WHY DO I NOT ",13); } }Converting a statement into a 'WHY?' question;
void strwhy(char *data) { /* Change data into a question */ char *p; p = data; strins(data,"WHY "); strcor(data); /* Remove any text following the first part of the string */ p = strpbrk(data,",.:;!"); if (p) *p = 0; p = data; while(*p && *p != '.' && *p != '!') p++; if (*p) *p = '?'; else strins(p,"? "); }Locate a substring within a string, and return a pointer to the first word following that substring;
char *strrstr(char *s1, char *s2) { /* find occurence of s2 in s1, and point to first word after it */ char *p; p = strstr(s1,s2); if (p) { p += strlen(s2); while(*p == 32) p++; } return(p); }A demonstration function which calls the toolbox functions;
main() { char data[80]; char *p; clrscr(); strcpy(data,"My name is Michael Caine!"); p = strftos(data); if (p == NULL) printf("\nUnable to change string"); else printf("\n%s",data); strcpy(data,"My boyfriend made me come to see you."); p = strftos(data); if (p == NULL) printf("\nUnable to change string"); else printf("\n%s",data); strcpy(data,"I used to think that I was ugly, but now I am not so sure"); p = strftos(data); if (p == NULL) printf("\nUnable to change string"); else printf("\n%s",data); strcpy(data,"I think you are a bit of a wally!"); p = strftos(data); if (p == NULL) printf("\nUnable to change string"); else printf("\n%s",data); strcpy(data,"MY FRIEND MADE ME COME TO SEE YOU"); p = strftos(data); if (p == NULL) printf("\nUnable to change string"); else printf("\n%s",data); strcpy(data,"WHY I DO NOT LIKE YOU ANYMORE"); p = strftos(data); if (p == NULL) printf("\nUnable to change string"); else { strcor(data); printf("\n%s",data); } strcpy(data,"I DO NOT LIKE YOU, ANYMORE, you big prat!"); p = strftos(data); if (p == NULL) printf("\nUnable to change string"); else { strwhy(data); printf("\n%s",data); } strcpy(data,"WHAT IS THE MATTER WITH THE ECONOMY?"); printf("\n%s",strrstr(data,"THE MATTER")); }