/* * Copyright (c) 2007 Mohammed Sameer. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* $Id$ */ #include #include #include #define IS_LAM_ALEF(x) ((x == 0x000fef7) || (x == 0x000fefb) || (x == 0x000fef5) || (x == 0x000fef9)) gunichar *to_ucs4(char *buff, glong *len); void clean_text(gunichar *text, glong len); void print_ucs4(gunichar c); int main(int argc, char *argv[]) { FILE *stream = NULL; char buff[LINE_MAX]; glong len; if (argc != 2) { stream = fdopen(0, "r"); if (!stream) { perror("fdopen"); return 1; } } else { stream = fopen(argv[1], "r"); if (!stream) { perror("fopen"); return 1; } } while (fgets(buff, LINE_MAX, stream)) { gunichar *text = to_ucs4(buff, &len); clean_text(text, len); g_free(text); } fclose(stream); return 0; } gunichar *to_ucs4(char *buff, glong *len) { GError *error = NULL; gunichar *ucs = g_utf8_to_ucs4(buff, -1, NULL, len, &error); if (!ucs) { fprintf(stderr, "%s\n", error->message); int retval = error->code; g_error_free(error); exit(retval); } return ucs; } void clean_text(gunichar *text, glong len) { glong x; for (x = 0; x < len; x++) { gunichar c = text[x]; // If it's a diacritic or a kashida, eat it. if ( (c == 0x064B) || (c == 0x064C) || (c == 0x064D) || (c == 0x064E) || (c == 0x064F) || (c == 0x0650) || (c == 0x0651) || (c == 0x0652) || (c == 0x0653) || (c == 0x0654) || (c == 0x0655) || (c == 0x0640) ) { // ignore ; } else if (IS_LAM_ALEF(c)) { print_ucs4(0x0644); switch (c) { case 0xFEF5: { print_ucs4(0x0622); break; } case 0xFEF7: { print_ucs4(0x0623); break; } case 0xFEF9: { print_ucs4(0x0625); break; } case 0xFEFB: { print_ucs4(0x0627); break; } } } else { // print it. print_ucs4(c); } } } void print_ucs4(gunichar c) { gchar buff[7]; gint len; len = g_unichar_to_utf8(c, buff); buff[len] = '\0'; printf("%s", buff); }