TOOLS

When working with natural language, there are some operations which are required time and again. Conversational computing requires the ability to convert a phrase between the first and second person, as in; "I am going to Italy for my holidays" and its response "Why are you going to Italy for your holidays?". Sometimes you will need to convert a phrase into a question. In doing so you may encounter the problem of "DO NOT", as in; "I do not like fish", which as an interrogative response is "Why do you not like fish?" Notice that the first person "I" has been changed to the second person "you". Also the "I do not" phrase has been rearranged to "Why do you not".

Include files for an NLP toolbox;

#include 
#include 
#include 
#include 
Truncating a string by a specified length;
void truncstr(char *p,int num)
{
    /* Truncate string by losing last num characters */
    if (num < strlen(p))
        p[strlen(p) - num] = 0;
}
Changing a string's length by inserting or deleting characters;
void strlench(char *p,int num)
{
    /* Change length of string by adding or deleting characters */

    if (num > 0)
        memmove(p + num,p,strlen(p) + 1);
    else
    {
        num = 0 - num;
        memmove(p,p + num,strlen(p) + 1);
    }
}
Inserting one string into another string;
void strins(char *p, char *q)
{
    /* Insert string q into p */
    strlench(p,strlen(q));
    strncpy(p,q,strlen(q));
}
Replacing all occurences of one substring within a string with another substring;
void strchg(char *data, char *s1, char *s2)
{
    /* Replace all occurences of s1 with s2 */
    char *p;
    char changed;

    do
    {
        changed = 0;
        p = strstr(data,s1);
        if (p)
        {
            /* Delete original string */
            strlench(p,0 - strlen(s1));

            /* Insert replacement string */
            strins(p,s2);
            changed = 1;
        }
    }
    while(changed);
}
Switching between first and second person references in a string;
char *strftos(char *data)
{
    /* convert first person to second person */
    /* I ---------> YOU */
    /* ME --------> YOU */
    /* I AM ------> YOU ARE */
    /* YOUR ------> MY */
    /* MY --------> YOUR */
    /* YOU ARE ---> I AM */
    /* YOU -------> I */
    /* I WAS -----> YOU WERE */
    /* YOU WERE --> I WAS */

    char *copy;
    char *p;
    char *q;
    char *r;

    strcat(data," ");

    /* Allocate space for a copy of the string */
    copy = malloc(strlen(data) * 2);
    if (copy == NULL)
        return NULL;
    strcpy(copy,"");

    /* Check first phrase */
    p = data;
    do
    {
        if (strncmp(p,"I AM",4) == 0)
        {
            q = &p[4];
            if (ispunct(*q) || *q == 0 || *q == 32)
            {
                strcat(copy,"YOU ARE ");
                copy[strlen(copy) - 1] = *q;
            }
            p += 5;
        }
        else
        if (strncmp(p,"I am",4) == 0)
        {
            q = &p[4];
            if (ispunct(*q) || *q == 0 || *q == 32)
            {
                if (p == data)
                    strcat(copy,"You are ");
                else
                {
                    p--;
                    if (*p == '.')
                        strcat(copy,"You are ");
                    else
                        strcat(copy,"you are ");
                    p++;
                }
                copy[strlen(copy) - 1] = *q;
                p += 5;
            }
        }
        else
        if (strncmp(p,"I WAS ",6) == 0)
        {
            strcat(copy,"YOU WERE ");
            p += 6;
        }
        else
        if (strncmp(p,"I was ",6) == 0)
        {
            if (p == data)
                strcat(copy,"You were ");
            else
            {
                p--;
                if (*p == '.')
                    strcat(copy,"You were ");
                else
                    strcat(copy,"you were ");
                p++;
            }
            p += 6;
        }
        else
        if (strncmp(p,"YOU ARE",7) == 0)
        {
            q = &p[7];
            if (ispunct(*q) || *q == 0 || *q == 32)
            {
                strcat(copy,"I AM ");
                copy[strlen(copy) - 1] = *q;
                p += 8;
            }
        }
        else
        if (strncmp(p,"You are",7) == 0)
        {
            q = &p[7];
            if (ispunct(*q) || *q == 0 || *q == 32)
            {
                strcat(copy,"I am ");
                copy[strlen(copy) - 1] = *q;
                p += 8;
            }
        }
        else
        if (strncmp(p,"you are",7) == 0)
        {
            q = &p[7];
            if (ispunct(*q) || *q == 0 || *q == 32)
            {
                strcat(copy,"I am ");
                copy[strlen(copy) - 1] = *q;
                p += 8;
            }
        }
        else
        if (strncmp(p,"YOU WERE ",9) == 0)
        {
            strcat(copy,"I WAS ");
            p += 9;
        }
        else
        if (strncmp(p,"You were ",9) == 0)
        {
            strcat(copy,"I was ");
            p += 9;
        }
        else
        if (strncmp(p,"you were ",9) == 0)
        {
            strcat(copy,"I was ");
            p += 9;
        }
        else
        if (strncmp(p,"I ",2) == 0)
        {
            /* Is the next word uppercase ? */
            if (p == data)
            {
                q = p;
                q++;
                while(*q == 32)
                    q++;
                if(islower(*q))
                    strcat(copy,"You ");
                else
                    strcat(copy,"YOU ");
            }
            else
            {
                q = p;
                q++;
                while(*q == 32)
                    q++;
                if(islower(*q))
                    strcat(copy,"you ");
                else
                    strcat(copy,"YOU ");
            }
            p += 2;
        }
        else
        if (strncmp(p,"YOU",3) == 0)
        {
            r = &p[3];
            if (ispunct(*r) || *r == 0 || *r == 32)
            {
                if (p == data)
                    strcat(copy,"I ");
                else
                {
                    q = p;
                    while(*q != '.' && q >= data)
                    {
                        q--;
                        if (*q != 32)
                            break;
                    }
                    if (q == data || *q == '.')
                        strcat(copy,"I ");
                    else
                        strcat(copy,"ME ");
                }
                copy[strlen(copy) - 1] = *r;
                p += 4;
            }
        }
        else
        if (strncmp(p,"You",3) == 0)
        {
            r = &p[3];
            if (ispunct(*r) || *r == 0 || *r == 32)
            {
                strcat(copy,"I ");
                p += 4;
                copy[strlen(copy) -1] = *r;
            }
        }
        else
        if (strncmp(p,"you",3) == 0)
        {
            r = &p[3];
            if (ispunct(*r) || *r == 0 || *r == 32)
            {
                strcat(copy,"me ");
                p += 4;
                copy[strlen(copy) -1] = *r;
            }
        }
        else
        if (strncmp(p,"ME ",3) == 0)
        {
            strcat(copy,"YOU ");
            p += 3;
        }
        else
        if (strncmp(p,"Me ",3) == 0)
        {
            strcat(copy,"You ");
            p += 3;
        }
        else
        if (strncmp(p,"me ",2) == 0)
        {
            strcat(copy,"you ");
            p += 3;
        }
        else
        if (strncmp(p,"YOUR ",5) == 0)
        {
            strcat(copy,"MY ");
            p += 5;
        }
        else
        if (strncmp(p,"Your ",5) == 0)
        {
            strcat(copy,"My ");
            p += 5;
        }
        else
        if (strncmp(p,"your ",5) == 0)
        {
            strcat(copy,"my ");
            p += 5;
        }
        else
        if (strncmp(p,"MY ",3) == 0)
        {
            strcat(copy,"YOUR ");
            p += 3;
        }
        else
        if (strncmp(p,"My ",3) == 0)
        {
            strcat(copy,"Your ");
            p += 3;
        }
        else
        if (strncmp(p,"my ",3) == 0)
        {
            strcat(copy,"your ");
            p += 3;
        }
        else
        if (strncmp(p,"ME.",3) == 0)
        {
            strcat(copy,"YOU.");
            p += 3;
        }
        else
        if (strncmp(p,"Me.",3) == 0)
        {
            strcat(copy,"You.");
            p += 3;
        }
        else
        if (strncmp(p,"me.",2) == 0)
        {
            strcat(copy,"you.");
            p += 3;
        }
        else
        if (strncmp(p,"YOU.",4) == 0)
        {
            strcat(copy,"ME.");
            p += 4;
        }
        else
        if (strncmp(p,"You.",4) == 0)
        {
            strcat(copy,"Me.");
            p += 4;
        }
        else
        if (strncmp(p,"you.",4) == 0)
        {
            strcat(copy,"me.");
            p += 4;
        }

        q = ©[strlen(copy)];
        /* Step to next word */
        while(*p && *p != 32)
            *q++ = *p++;

        while(strchr(" ,.:;?!",*p) && *p)
            *q++ = *p++;
        *q = 0;
    }
    while(*p);

    /* Remove trailing space */
    truncstr(copy,1);

    /* Transfer copy to data */
    strcpy(data,copy);

    /* Free memory */
    free(copy);
    return data;
}
Correcting gramatical errors which may occur when converting phrases;
void strcor(char *data)
{
    /* Correct common gramatical errors */
    char *p;

    p = strstr(data,"WHY YOU DO NOT");
    if (p)
        strncpy(p,"WHY DO YOU NOT ",14);

    p = strstr(data,"WHY ME DO NOT");
    if (p)
    {
        strlench(p,-1);
        strncpy(p,"WHY DO I NOT ",13);
    }
}
Converting a statement into a 'WHY?' question;
void strwhy(char *data)
{
    /* Change data into a question */
    char *p;

    p = data;
    strins(data,"WHY ");
    strcor(data);

    /* Remove any text following the first part of the string */
    p = strpbrk(data,",.:;!");
    if (p)
        *p = 0;

    p = data;
    while(*p && *p != '.' && *p != '!')
        p++;
    if (*p)
        *p = '?';
    else
        strins(p,"? ");
}
Locate a substring within a string, and return a pointer to the first word following that substring;
char *strrstr(char *s1, char *s2)
{
    /* find occurence of s2 in s1, and point to first word after it */
    char *p;

    p = strstr(s1,s2);
    if (p)
    {
        p += strlen(s2);
        while(*p == 32)
            p++;
    }
    return(p);
}
A demonstration function which calls the toolbox functions;
main()
{
    char data[80];
    char *p;

    clrscr();

    strcpy(data,"My name is Michael Caine!");

    p = strftos(data);
    if (p == NULL)
        printf("\nUnable to change string");
    else
        printf("\n%s",data);

    strcpy(data,"My boyfriend made me come to see you.");

    p = strftos(data);
    if (p == NULL)
        printf("\nUnable to change string");
    else
        printf("\n%s",data);

    strcpy(data,"I used to think that I was ugly, but now I am not so sure");

    p = strftos(data);
    if (p == NULL)
        printf("\nUnable to change string");
    else
        printf("\n%s",data);

    strcpy(data,"I think you are a bit of a wally!");

    p = strftos(data);
    if (p == NULL)
        printf("\nUnable to change string");
    else
        printf("\n%s",data);

    strcpy(data,"MY FRIEND MADE ME COME TO SEE YOU");

    p = strftos(data);
    if (p == NULL)
        printf("\nUnable to change string");
    else
        printf("\n%s",data);

    strcpy(data,"WHY I DO NOT LIKE YOU ANYMORE");

    p = strftos(data);
    if (p == NULL)
        printf("\nUnable to change string");
    else
    {
        strcor(data);
        printf("\n%s",data);
    }

    strcpy(data,"I DO NOT LIKE YOU, ANYMORE, you big prat!");

    p = strftos(data);
    if (p == NULL)
        printf("\nUnable to change string");
    else
    {
        strwhy(data);
        printf("\n%s",data);
    }

    strcpy(data,"WHAT IS THE MATTER WITH THE ECONOMY?");

    printf("\n%s",strrstr(data,"THE MATTER"));
}