gnc-csv-model.c

00001 #include "gnc-csv-model.h"
00002 
00003 #include "gnc-book.h"
00004 
00005 #include <glib/gi18n.h>
00006 #include <goffice/utils/go-glib-extras.h>
00007 
00008 #include <string.h>
00009 #include <sys/time.h>
00010 
00011 #include <sys/types.h>
00012 #include <sys/stat.h>
00013 #include <regex.h>
00014 #include <unistd.h>
00015 #include <fcntl.h>
00016 #include <stdlib.h>
00017 #include <time.h>
00018 #ifndef HAVE_LOCALTIME_R
00019 #include "localtime_r.h"
00020 #endif
00021 
00022 static QofLogModule log_module = GNC_MOD_IMPORT;
00023 
00024 const int num_date_formats = 5;
00025 
00026 const gchar* date_format_user[] = {N_("y-m-d"),
00027                                    N_("d-m-y"),
00028                                    N_("m-d-y"),
00029                                    N_("d-m"),
00030                                    N_("m-d")};
00031 
00032 /* This array contains all of the different strings for different column types. */
00033 gchar* gnc_csv_column_type_strs[GNC_CSV_NUM_COL_TYPES] = {N_("None"),
00034                                                           N_("Date"),
00035                                                           N_("Description"),
00036                                                           N_("Balance"),
00037                                                           N_("Deposit"),
00038                                                           N_("Withdrawal"),
00039                                                           N_("Num")};
00040 
00044 static StfParseOptions_t* default_parse_options(void)
00045 {
00046   StfParseOptions_t* options = stf_parse_options_new();
00047   stf_parse_options_set_type(options, PARSE_TYPE_CSV);
00048   stf_parse_options_csv_set_separators(options, ",", NULL);
00049   return options;
00050 }
00051 
00059 static time_t parse_date_with_year(const char* date_str, int format)
00060 {
00061   time_t rawtime; /* The integer time */
00062   struct tm retvalue, test_retvalue; /* The time in a broken-down structure */
00063   
00064   int i, j, mem_length, orig_year = -1, orig_month = -1, orig_day = -1;
00065 
00066   /* Buffer for containing individual parts (e.g. year, month, day) of a date */
00067   char date_segment[5];
00068 
00069   /* The compiled regular expression */
00070   regex_t preg = {0};
00071 
00072   /* An array containing indices specifying the matched substrings in date_str */
00073   regmatch_t pmatch[4] = { {0}, {0}, {0}, {0} };
00074 
00075   /* The regular expression for parsing dates */
00076   const char* regex = "^ *([0-9]+) *[-/.'] *([0-9]+) *[-/.'] *([0-9]+).*$|^ *([0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]).*$";
00077 
00078   /* We get our matches using the regular expression. */
00079   regcomp(&preg, regex, REG_EXTENDED);
00080   regexec(&preg, date_str, 4, pmatch, 0);
00081   regfree(&preg);
00082 
00083   /* If there wasn't a match, there was an error. */
00084   if(pmatch[0].rm_eo == 0)
00085     return -1;
00086 
00087   /* If this is a string without separators ... */
00088   if(pmatch[1].rm_so == -1)
00089   {
00090     /* ... we will fill in the indices based on the user's selection. */
00091     int k = 0; /* k traverses date_str by keeping track of where separators "should" be. */
00092     j = 1; /* j traverses pmatch. */
00093     for(i = 0; date_format_user[format][i]; i++)
00094     {
00095       char segment_type = date_format_user[format][i];
00096       /* Only do something if this is a meaningful character */
00097       if(segment_type == 'y' || segment_type == 'm' || segment_type == 'd')
00098       {
00099         pmatch[j].rm_so = k;
00100         switch(segment_type)
00101         {
00102         case 'm':
00103         case 'd':
00104           k += 2;
00105           break;
00106 
00107         case 'y':
00108           k += 4;
00109           break;
00110         }
00111 
00112         pmatch[j].rm_eo = k;
00113         j++;
00114       }
00115     }    
00116   }
00117 
00118   /* Put some sane values in retvalue by using the current time for
00119    * the non-year-month-day parts of the date. */
00120   time(&rawtime);
00121   localtime_r(&rawtime, &retvalue);
00122 
00123   /* j traverses pmatch (index 0 contains the entire string, so we
00124    * start at index 1 for the first meaningful match). */
00125   j = 1;
00126   /* Go through the date format and interpret the matches in order of
00127    * the sections in the date format. */
00128   for(i = 0; date_format_user[format][i]; i++)
00129   {
00130     char segment_type = date_format_user[format][i];
00131     /* Only do something if this is a meaningful character */
00132     if(segment_type == 'y' || segment_type == 'm' || segment_type == 'd')
00133     {
00134       /* Copy the matching substring into date_segment so that we can
00135        * convert it into an integer. */
00136       mem_length = pmatch[j].rm_eo - pmatch[j].rm_so;
00137       memcpy(date_segment, date_str + pmatch[j].rm_so, mem_length);
00138       date_segment[mem_length] = '\0';
00139 
00140       /* Set the appropriate member of retvalue. Save the original
00141        * values so that we can check if the change when we use mktime
00142        * below. */
00143       switch(segment_type)
00144       {
00145       case 'y':
00146         retvalue.tm_year = atoi(date_segment);
00147 
00148         /* Handle two-digit years. */
00149         if(retvalue.tm_year < 100)
00150         {
00151           /* We allow two-digit years in the range 1969 - 2068. */
00152           if(retvalue.tm_year < 69)
00153             retvalue.tm_year += 100;
00154         }
00155         else
00156           retvalue.tm_year -= 1900;
00157         orig_year = retvalue.tm_year;
00158         break;
00159         
00160       case 'm':
00161         orig_month = retvalue.tm_mon = atoi(date_segment) - 1;
00162         break;
00163         
00164       case 'd':
00165         orig_day = retvalue.tm_mday = atoi(date_segment);
00166         break;
00167       }
00168       j++;
00169     }
00170   }
00171   /* Convert back to an integer. If mktime leaves retvalue unchanged,
00172    * everything is okay; otherwise, an error has occurred. */
00173   /* We have to use a "test" date value to account for changes in
00174    * daylight savings time, which can cause a date change with mktime
00175    * near midnight, causing the code to incorrectly think a date is
00176    * incorrect. */
00177   test_retvalue = retvalue;
00178   mktime(&test_retvalue);
00179   retvalue.tm_isdst = test_retvalue.tm_isdst;
00180   rawtime = mktime(&retvalue);
00181   if(retvalue.tm_mday == orig_day &&
00182      retvalue.tm_mon == orig_month &&
00183      retvalue.tm_year == orig_year)
00184   {
00185     return rawtime;
00186   }
00187   else
00188   {
00189     return -1;
00190   }
00191 }
00192 
00200 static time_t parse_date_without_year(const char* date_str, int format)
00201 {
00202   time_t rawtime; /* The integer time */
00203   struct tm retvalue, test_retvalue; /* The time in a broken-down structure */
00204   
00205   int i, j, mem_length, orig_year = -1, orig_month = -1, orig_day = -1;
00206 
00207   /* Buffer for containing individual parts (e.g. year, month, day) of a date */
00208   gchar* date_segment;
00209 
00210   /* The compiled regular expression */
00211   regex_t preg = {0};
00212 
00213   /* An array containing indices specifying the matched substrings in date_str */
00214   regmatch_t pmatch[3] = { {0}, {0}, {0} };
00215 
00216   /* The regular expression for parsing dates */
00217   const char* regex = "^ *([0-9]+) *[-/.'] *([0-9]+).*$";
00218 
00219   /* We get our matches using the regular expression. */
00220   regcomp(&preg, regex, REG_EXTENDED);
00221   regexec(&preg, date_str, 3, pmatch, 0);
00222   regfree(&preg);
00223 
00224   /* If there wasn't a match, there was an error. */
00225   if(pmatch[0].rm_eo == 0)
00226     return -1;
00227 
00228   /* Put some sane values in retvalue by using the current time for
00229    * the non-year-month-day parts of the date. */
00230   time(&rawtime);
00231   localtime_r(&rawtime, &retvalue);
00232   orig_year = retvalue.tm_year;
00233 
00234   /* j traverses pmatch (index 0 contains the entire string, so we
00235    * start at index 1 for the first meaningful match). */
00236   j = 1;
00237   /* Go through the date format and interpret the matches in order of
00238    * the sections in the date format. */
00239   for(i = 0; date_format_user[format][i]; i++)
00240   {
00241     char segment_type = date_format_user[format][i];
00242     /* Only do something if this is a meaningful character */
00243     if(segment_type == 'm' || segment_type == 'd')
00244     {
00245       /* Copy the matching substring into date_segment so that we can
00246        * convert it into an integer. */
00247       mem_length = pmatch[j].rm_eo - pmatch[j].rm_so;
00248       date_segment = g_new(gchar, mem_length);
00249       memcpy(date_segment, date_str + pmatch[j].rm_so, mem_length);
00250       date_segment[mem_length] = '\0';
00251 
00252       /* Set the appropriate member of retvalue. Save the original
00253        * values so that we can check if the change when we use mktime
00254        * below. */
00255       switch(segment_type)
00256       {
00257       case 'm':
00258         orig_month = retvalue.tm_mon = atoi(date_segment) - 1;
00259         break;
00260         
00261       case 'd':
00262         orig_day = retvalue.tm_mday = atoi(date_segment);
00263         break;
00264       }
00265       g_free(date_segment);
00266       j++;
00267     }
00268   }
00269   /* Convert back to an integer. If mktime leaves retvalue unchanged,
00270    * everything is okay; otherwise, an error has occurred. */
00271   /* We have to use a "test" date value to account for changes in
00272    * daylight savings time, which can cause a date change with mktime
00273    * near midnight, causing the code to incorrectly think a date is
00274    * incorrect. */
00275   test_retvalue = retvalue;
00276   mktime(&test_retvalue);
00277   retvalue.tm_isdst = test_retvalue.tm_isdst;
00278   rawtime = mktime(&retvalue);
00279   if(retvalue.tm_mday == orig_day &&
00280      retvalue.tm_mon == orig_month &&
00281      retvalue.tm_year == orig_year)
00282   {
00283     return rawtime;
00284   }
00285   else
00286   {
00287     return -1;
00288   }
00289 }
00290 
00299 static time_t parse_date(const char* date_str, int format)
00300 {
00301   if(strchr(date_format_user[format], 'y'))
00302     return parse_date_with_year(date_str, format);
00303   else
00304     return parse_date_without_year(date_str, format);
00305 }
00306 
00310 GncCsvParseData* gnc_csv_new_parse_data(void)
00311 {
00312   GncCsvParseData* parse_data = g_new(GncCsvParseData, 1);
00313   parse_data->encoding = "UTF-8";
00314   /* All of the data pointers are initially NULL. This is so that, if
00315    * gnc_csv_parse_data_free is called before all of the data is
00316    * initialized, only the data that needs to be freed is freed. */
00317   parse_data->raw_str.begin = parse_data->raw_str.end
00318     = parse_data->file_str.begin = parse_data->file_str.end = NULL;
00319   parse_data->orig_lines = NULL;
00320   parse_data->orig_row_lengths = NULL;
00321   parse_data->column_types = NULL;
00322   parse_data->error_lines = parse_data->transactions = NULL;
00323   parse_data->options = default_parse_options();
00324   parse_data->date_format = -1;
00325   parse_data->chunk = g_string_chunk_new(100 * 1024);
00326   return parse_data;
00327 }
00328 
00332 void gnc_csv_parse_data_free(GncCsvParseData* parse_data)
00333 {
00334   /* All non-NULL pointers have been initialized and must be freed. */
00335 
00336   if(parse_data->raw_mapping != NULL)
00337     g_mapped_file_free(parse_data->raw_mapping);
00338 
00339   if(parse_data->file_str.begin != NULL)
00340     g_free(parse_data->file_str.begin);
00341 
00342   if(parse_data->orig_lines != NULL)
00343     stf_parse_general_free(parse_data->orig_lines);
00344 
00345   if(parse_data->orig_row_lengths != NULL)
00346     g_array_free(parse_data->orig_row_lengths, FALSE);
00347 
00348   if(parse_data->options != NULL)
00349     stf_parse_options_free(parse_data->options);
00350 
00351   if(parse_data->column_types != NULL)
00352     g_array_free(parse_data->column_types, TRUE);
00353 
00354   if(parse_data->error_lines != NULL)
00355     g_list_free(parse_data->error_lines);
00356 
00357   if(parse_data->transactions != NULL)
00358   {
00359     GList* transactions = parse_data->transactions;
00360     /* We have to free the GncCsvTransLine's that are at each node in
00361      * the list before freeing the entire list. */
00362     do
00363     {
00364       g_free(transactions->data);
00365       transactions = g_list_next(transactions);
00366     } while(transactions != NULL);
00367     g_list_free(parse_data->transactions);
00368   }
00369 
00370   g_free(parse_data->chunk);
00371   g_free(parse_data);
00372 }
00373 
00382 int gnc_csv_convert_encoding(GncCsvParseData* parse_data, const char* encoding,
00383                              GError** error)
00384 {
00385   gsize bytes_read, bytes_written;
00386 
00387   /* If parse_data->file_str has already been initialized it must be
00388    * freed first. (This should always be the case, since
00389    * gnc_csv_load_file should always be called before this
00390    * function.) */
00391   if(parse_data->file_str.begin != NULL)
00392     g_free(parse_data->file_str.begin);
00393 
00394   /* Do the actual translation to UTF-8. */
00395   parse_data->file_str.begin = g_convert(parse_data->raw_str.begin,
00396                                          parse_data->raw_str.end - parse_data->raw_str.begin,
00397                                          "UTF-8", encoding, &bytes_read, &bytes_written,
00398                                          error);
00399   /* Handle errors that occur. */
00400   if(parse_data->file_str.begin == NULL)
00401     return 1;
00402 
00403   /* On success, save the ending pointer of the translated data and
00404    * the encoding type and return 0. */
00405   parse_data->file_str.end = parse_data->file_str.begin + bytes_written;
00406   parse_data->encoding = (gchar*)encoding;
00407   return 0;
00408 }
00409 
00422 int gnc_csv_load_file(GncCsvParseData* parse_data, const char* filename,
00423                       GError** error)
00424 {
00425   const char* guess_enc;
00426 
00427   /* Get the raw data first and handle an error if one occurs. */
00428   parse_data->raw_mapping = g_mapped_file_new(filename, FALSE, error);
00429   if(parse_data->raw_mapping == NULL)
00430   {
00431     /* TODO Handle file opening errors more specifically,
00432      * e.g. inexistent file versus no read permission. */
00433     parse_data->raw_str.begin = NULL;
00434     g_set_error(error, 0, GNC_CSV_FILE_OPEN_ERR, _("File opening failed."));
00435     return 1;
00436   }
00437 
00438   /* Copy the mapping's contents into parse-data->raw_str. */
00439   parse_data->raw_str.begin = g_mapped_file_get_contents(parse_data->raw_mapping);
00440   parse_data->raw_str.end = parse_data->raw_str.begin + g_mapped_file_get_length(parse_data->raw_mapping);
00441 
00442   /* Make a guess at the encoding of the data. */
00443   guess_enc = go_guess_encoding((const char*)(parse_data->raw_str.begin),
00444                                 (size_t)(parse_data->raw_str.end - parse_data->raw_str.begin),
00445                                 "UTF-8", NULL);
00446   if(guess_enc == NULL)
00447   {
00448     g_set_error(error, 0, GNC_CSV_ENCODING_ERR, _("Unknown encoding."));
00449     return 1;
00450   }
00451 
00452   /* Convert using the guessed encoding into parse_data->file_str and
00453    * handle any errors that occur. */
00454   gnc_csv_convert_encoding(parse_data, guess_enc, error);
00455   if(parse_data->file_str.begin == NULL)
00456   {
00457     g_set_error(error, 0, GNC_CSV_ENCODING_ERR, _("Unknown encoding."));
00458     return 1;
00459   }
00460   else
00461     return 0;
00462 }
00463 
00476 int gnc_csv_parse(GncCsvParseData* parse_data, gboolean guessColTypes, GError** error)
00477 {
00478   /* max_cols is the number of columns in the row with the most columns. */
00479   int i, max_cols = 0;
00480 
00481   if(parse_data->orig_lines != NULL)
00482   {
00483     stf_parse_general_free(parse_data->orig_lines);
00484   }
00485 
00486   /* If everything is fine ... */
00487   if(parse_data->file_str.begin != NULL)
00488   {
00489     /* Do the actual parsing. */
00490     parse_data->orig_lines = stf_parse_general(parse_data->options, parse_data->chunk,
00491                                                parse_data->file_str.begin,
00492                                                parse_data->file_str.end);
00493   }
00494   /* If we couldn't get the encoding right, we just want an empty array. */
00495   else
00496   {
00497     parse_data->orig_lines = g_ptr_array_new();
00498   }
00499 
00500   /* Record the original row lengths of parse_data->orig_lines. */
00501   if(parse_data->orig_row_lengths != NULL)
00502     g_array_free(parse_data->orig_row_lengths, FALSE);
00503 
00504   parse_data->orig_row_lengths =
00505     g_array_sized_new(FALSE, FALSE, sizeof(int), parse_data->orig_lines->len);
00506   g_array_set_size(parse_data->orig_row_lengths, parse_data->orig_lines->len);
00507   parse_data->orig_max_row = 0;
00508   for(i = 0; i < parse_data->orig_lines->len; i++)
00509   {
00510     int length = ((GPtrArray*)parse_data->orig_lines->pdata[i])->len;
00511     parse_data->orig_row_lengths->data[i] = length;
00512     if(length > parse_data->orig_max_row)
00513       parse_data->orig_max_row = length;
00514   }
00515 
00516   /* If it failed, generate an error. */
00517   if(parse_data->orig_lines == NULL)
00518   {
00519     g_set_error(error, 0, 0, "Parsing failed.");
00520     return 1;
00521   }
00522 
00523   /* Now that we have data, let's set max_cols. */
00524   for(i = 0; i < parse_data->orig_lines->len; i++)
00525   {
00526     if(max_cols < ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len)
00527       max_cols = ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len;
00528   }
00529 
00530   if(guessColTypes)
00531   {
00532     /* Free parse_data->column_types if it's already been created. */
00533     if(parse_data->column_types != NULL)
00534       g_array_free(parse_data->column_types, TRUE);
00535 
00536     /* Create parse_data->column_types and fill it with guesses based
00537      * on the contents of each column. */
00538     parse_data->column_types = g_array_sized_new(FALSE, FALSE, sizeof(int),
00539                                                  max_cols);
00540     g_array_set_size(parse_data->column_types, max_cols);
00541     /* TODO Make it actually guess. */
00542     for(i = 0; i < parse_data->column_types->len; i++)
00543     {
00544       parse_data->column_types->data[i] = GNC_CSV_NONE;
00545     }
00546   }
00547   else
00548   {
00549     /* If we don't need to guess column types, we will simply set any
00550      * new columns that are created that didn't exist before to "None"
00551      * since we don't want gibberish to appear. Note:
00552      * parse_data->column_types should have already been
00553      * initialized, so we don't check for it being NULL. */
00554     int i = parse_data->column_types->len;
00555     g_array_set_size(parse_data->column_types, max_cols);
00556     for(; i < parse_data->column_types->len; i++)
00557     {
00558       parse_data->column_types->data[i] = GNC_CSV_NONE;
00559     }
00560   }
00561 
00562   return 0;
00563 }
00564 
00566 typedef struct
00567 {
00568   int date_format; 
00569   Account* account; 
00570   GList* properties; 
00571 } TransPropertyList;
00572 
00574 typedef struct
00575 {
00576   int type; 
00578   void* value; 
00579   TransPropertyList* list; 
00580 } TransProperty;
00581 
00585 static TransProperty* trans_property_new(int type, TransPropertyList* list)
00586 {
00587   TransProperty* prop = g_new(TransProperty, 1);
00588   prop->type = type;
00589   prop->list = list;
00590   prop->value = NULL;
00591   return prop;
00592 }
00593 
00597 static void trans_property_free(TransProperty* prop)
00598 {
00599   switch(prop->type)
00600   {
00601     /* The types for "Date" and "Balance" (time_t and gnc_numeric,
00602      * respectively) are typically not pointed to, we have to free
00603      * them, unlike types like char* ("Description"). */
00604   case GNC_CSV_DATE:
00605   case GNC_CSV_BALANCE:
00606   case GNC_CSV_DEPOSIT:
00607   case GNC_CSV_WITHDRAWAL:
00608     if(prop->value != NULL)
00609       g_free(prop->value);
00610     break;
00611   }
00612   g_free(prop);
00613 }
00614 
00622 static gboolean trans_property_set(TransProperty* prop, char* str)
00623 {
00624   char *endptr, *possible_currency_symbol, *str_dupe;
00625   double value;
00626   switch(prop->type)
00627   {
00628   case GNC_CSV_DATE:
00629     prop->value = g_new(time_t, 1);
00630     *((time_t*)(prop->value)) = parse_date(str, prop->list->date_format);
00631     return *((time_t*)(prop->value)) != -1;
00632 
00633   case GNC_CSV_DESCRIPTION:
00634   case GNC_CSV_NUM:
00635     prop->value = g_strdup(str);
00636     return TRUE;
00637 
00638   case GNC_CSV_BALANCE:
00639   case GNC_CSV_DEPOSIT:
00640   case GNC_CSV_WITHDRAWAL:
00641     str_dupe = g_strdup(str); /* First, we make a copy so we can't mess up real data. */
00642 
00643     /* Go through str_dupe looking for currency symbols. */
00644     for(possible_currency_symbol = str_dupe; *possible_currency_symbol;
00645         possible_currency_symbol = g_utf8_next_char(possible_currency_symbol))
00646     {
00647       if(g_unichar_type(g_utf8_get_char(possible_currency_symbol)) == G_UNICODE_CURRENCY_SYMBOL)
00648       {
00649         /* If we find a currency symbol, save the position just ahead
00650          * of the currency symbol (next_symbol), and find the null
00651          * terminator of the string (last_symbol). */
00652         char *next_symbol = g_utf8_next_char(possible_currency_symbol), *last_symbol = next_symbol;
00653         while(*last_symbol)
00654           last_symbol = g_utf8_next_char(last_symbol);
00655 
00656         /* Move all of the string (including the null byte, which is
00657          * why we have +1 in the size parameter) following the
00658          * currency symbol back one character, thereby overwriting the
00659          * currency symbol. */
00660         memmove(possible_currency_symbol, next_symbol, last_symbol - next_symbol + 1);
00661         break;
00662       }
00663     }
00664 
00665     /* Translate the string (now clean of currency symbols) into a number. */
00666     value = strtod(str_dupe, &endptr);
00667 
00668     /* If this isn't a valid numeric string, this is an error. */
00669     if(endptr != str_dupe + strlen(str_dupe))
00670     {
00671       g_free(str_dupe);
00672       return FALSE;
00673     }
00674 
00675     g_free(str_dupe);
00676 
00677     if(abs(value) > 0.00001)
00678     {
00679       prop->value = g_new(gnc_numeric, 1);
00680       *((gnc_numeric*)(prop->value)) =
00681         double_to_gnc_numeric(value, xaccAccountGetCommoditySCU(prop->list->account),
00682                               GNC_RND_ROUND);
00683     }
00684     return TRUE;
00685   }
00686   return FALSE; /* We should never actually get here. */
00687 }
00688 
00694 static TransPropertyList* trans_property_list_new(Account* account, int date_format)
00695 {
00696   TransPropertyList* list = g_new(TransPropertyList, 1);
00697   list->account = account;
00698   list->date_format = date_format;
00699   list->properties = NULL;
00700   return list;
00701 }
00702 
00706 static void trans_property_list_free(TransPropertyList* list)
00707 {
00708   /* Free all of the properties in this list before freeeing the list itself. */
00709   GList* properties_begin = list->properties;
00710   while(list->properties != NULL)
00711   {
00712     trans_property_free((TransProperty*)(list->properties->data));
00713     list->properties = g_list_next(list->properties);
00714   }
00715   g_list_free(properties_begin);
00716   g_free(list);
00717 }
00718 
00724 static void trans_property_list_add(TransProperty* property)
00725 {
00726   property->list->properties = g_list_append(property->list->properties, property);
00727 }
00728 
00735 static void trans_add_split(Transaction* trans, Account* account, GNCBook* book,
00736                             gnc_numeric amount)
00737 {
00738   Split* split = xaccMallocSplit(book);
00739   xaccSplitSetAccount(split, account);
00740   xaccSplitSetParent(split, trans);
00741   xaccSplitSetAmount(split, amount);
00742   xaccSplitSetValue(split, amount);
00743   xaccSplitSetAction(split, "Deposit");
00744 }
00745 
00753 static gboolean trans_property_list_verify_essentials(TransPropertyList* list, gchar** error)
00754 {
00755   int i;
00756   /* possible_errors lists the ways in which a list can fail this test. */
00757   enum PossibleErrorTypes {NO_DATE, NO_AMOUNT, NUM_OF_POSSIBLE_ERRORS};
00758   gchar* possible_errors[NUM_OF_POSSIBLE_ERRORS] = {N_("No date column."),
00759                                                     N_("No balance, deposit, or withdrawal column.")};
00760   int possible_error_lengths[NUM_OF_POSSIBLE_ERRORS] = {0};
00761   GList *properties_begin = list->properties, *errors_list = NULL;
00762 
00763   /* Go through each of the properties and erase possible errors. */
00764   while(list->properties)
00765   {
00766     switch(((TransProperty*)(list->properties->data))->type)
00767     {
00768     case GNC_CSV_DATE:
00769       possible_errors[NO_DATE] = NULL;
00770       break;
00771 
00772     case GNC_CSV_BALANCE:
00773     case GNC_CSV_DEPOSIT:
00774     case GNC_CSV_WITHDRAWAL:
00775       possible_errors[NO_AMOUNT] = NULL;
00776       break;
00777     }
00778     list->properties = g_list_next(list->properties);
00779   }
00780   list->properties = properties_begin;
00781 
00782   /* Accumulate a list of the actual errors. */
00783   for(i = 0; i < NUM_OF_POSSIBLE_ERRORS; i++)
00784   {
00785     if(possible_errors[i] != NULL)
00786     {
00787       errors_list = g_list_append(errors_list, GINT_TO_POINTER(i));
00788       /* Since we added an error, we want to also store its length for
00789        * when we construct the full error string. */
00790       possible_error_lengths[i] = strlen(_(possible_errors[i]));
00791     }
00792   }
00793 
00794   /* If there are no errors, we can quit now. */
00795   if(errors_list == NULL)
00796     return TRUE;
00797   else
00798   {
00799     /* full_error_size is the full length of the error message. */
00800     int full_error_size = 0, string_length = 0;
00801     GList* errors_list_begin = errors_list;
00802     gchar *error_message, *error_message_begin;
00803 
00804     /* Find the value for full_error_size. */
00805     while(errors_list)
00806     {
00807       /* We add an extra 1 to account for spaces in between messages. */
00808       full_error_size += possible_error_lengths[GPOINTER_TO_INT(errors_list->data)] + 1;
00809       errors_list = g_list_next(errors_list);
00810     }
00811     errors_list = errors_list_begin;
00812 
00813     /* Append the error messages one after another. */
00814     error_message = error_message_begin = g_new(gchar, full_error_size);
00815     while(errors_list)
00816     {
00817       i = GPOINTER_TO_INT(errors_list->data);
00818       string_length = possible_error_lengths[i];
00819       
00820       /* Copy the error message and put a space after it. */
00821       strncpy(error_message, _(possible_errors[i]), string_length);
00822       error_message += string_length;
00823       *error_message = ' ';
00824       error_message++;
00825       
00826       errors_list = g_list_next(errors_list);
00827     }
00828     *error_message = '\0'; /* Replace the last space with the null byte. */
00829     g_list_free(errors_list_begin);
00830 
00831     *error = error_message_begin;
00832     return FALSE;
00833   }
00834 }
00835 
00841 static GncCsvTransLine* trans_property_list_to_trans(TransPropertyList* list, gchar** error)
00842 {
00843   GncCsvTransLine* trans_line = g_new(GncCsvTransLine, 1);
00844   GList* properties_begin = list->properties;
00845   GNCBook* book = gnc_account_get_book(list->account);
00846   gnc_commodity* currency = xaccAccountGetCommodity(list->account);
00847   gnc_numeric amount = double_to_gnc_numeric(0.0, xaccAccountGetCommoditySCU(list->account),
00848                                              GNC_RND_ROUND);
00849 
00850   /* This flag is set to TRUE if we can use the "Deposit" or "Withdrawal" column. */
00851   gboolean amount_set = FALSE;
00852 
00853   /* The balance is 0 by default. */
00854   trans_line->balance_set = FALSE;
00855   trans_line->balance = amount;
00856 
00857   /* We make the line_no -1 just to mark that it hasn't been set. We
00858    * may get rid of line_no soon anyway, so it's not particularly
00859    * important. */
00860   trans_line->line_no = -1;
00861 
00862   /* Make sure this is a transaction with all the columns we need. */
00863   if(!trans_property_list_verify_essentials(list, error))
00864   {
00865     g_free(trans_line);
00866     return NULL;
00867   }
00868 
00869   trans_line->trans = xaccMallocTransaction(book);
00870   xaccTransBeginEdit(trans_line->trans);
00871   xaccTransSetCurrency(trans_line->trans, currency);
00872 
00873   /* Go through each of the properties and edit the transaction accordingly. */
00874   list->properties = properties_begin;
00875   while(list->properties != NULL)
00876   {
00877     TransProperty* prop = (TransProperty*)(list->properties->data);
00878     switch(prop->type)
00879     {
00880     case GNC_CSV_DATE:
00881       xaccTransSetDatePostedSecs(trans_line->trans, *((time_t*)(prop->value)));
00882       break;
00883 
00884     case GNC_CSV_DESCRIPTION:
00885       xaccTransSetDescription(trans_line->trans, (char*)(prop->value));
00886       break;
00887 
00888     case GNC_CSV_NUM:
00889       xaccTransSetNum(trans_line->trans, (char*)(prop->value));
00890       break;
00891 
00892     case GNC_CSV_DEPOSIT: /* Add deposits to the existing amount. */
00893       if(prop->value != NULL)
00894       {
00895         amount = gnc_numeric_add(*((gnc_numeric*)(prop->value)),
00896                                  amount,
00897                                  xaccAccountGetCommoditySCU(list->account),
00898                                  GNC_RND_ROUND);
00899         amount_set = TRUE;
00900         /* We will use the "Deposit" and "Withdrawal" columns in preference to "Balance". */
00901         trans_line->balance_set = FALSE;
00902       }
00903       break;
00904 
00905     case GNC_CSV_WITHDRAWAL: /* Withdrawals are just negative deposits. */
00906       if(prop->value != NULL)
00907       {
00908         amount = gnc_numeric_add(gnc_numeric_neg(*((gnc_numeric*)(prop->value))),
00909                                  amount,
00910                                  xaccAccountGetCommoditySCU(list->account),
00911                                  GNC_RND_ROUND);
00912         amount_set = TRUE;
00913         /* We will use the "Deposit" and "Withdrawal" columns in preference to "Balance". */
00914         trans_line->balance_set = FALSE;
00915       }
00916       break;
00917 
00918     case GNC_CSV_BALANCE: /* The balance gets stored in a separate field in trans_line. */
00919       /* We will use the "Deposit" and "Withdrawal" columns in preference to "Balance". */
00920       if(!amount_set && prop->value != NULL)
00921       {
00922         /* This gets put into the actual transaction at the end of gnc_csv_parse_to_trans. */
00923         trans_line->balance = *((gnc_numeric*)(prop->value));
00924         trans_line->balance_set = TRUE;
00925       }
00926       break;
00927     }
00928     list->properties = g_list_next(list->properties);
00929   }
00930 
00931   /* Add a split with the cumulative amount value. */
00932   trans_add_split(trans_line->trans, list->account, book, amount);
00933 
00934   return trans_line;
00935 }
00936 
00947 int gnc_csv_parse_to_trans(GncCsvParseData* parse_data, Account* account,
00948                            gboolean redo_errors)
00949 {
00950   gboolean hasBalanceColumn;
00951   int i, j, max_cols = 0;
00952   GArray* column_types = parse_data->column_types;
00953   GList *error_lines = NULL, *begin_error_lines = NULL;
00954 
00955   /* last_transaction points to the last element in
00956    * parse_data->transactions, or NULL if it's empty. */
00957   GList* last_transaction = NULL;
00958 
00959   /* Free parse_data->error_lines and parse_data->transactions if they
00960    * already exist. */
00961   if(redo_errors) /* If we're redoing errors, we save freeing until the end. */
00962   {
00963     begin_error_lines = error_lines = parse_data->error_lines;
00964   }
00965   else
00966   {
00967     if(parse_data->error_lines != NULL)
00968     {
00969       g_list_free(parse_data->error_lines);
00970     }
00971     if(parse_data->transactions != NULL)
00972     {
00973       g_list_free(parse_data->transactions);
00974     }
00975   }
00976   parse_data->error_lines = NULL;
00977 
00978   if(redo_errors) /* If we're looking only at error data ... */
00979   {
00980     if(parse_data->transactions == NULL)
00981     {
00982       last_transaction = NULL;
00983     }
00984     else
00985     {
00986       /* Move last_transaction to the end. */
00987       last_transaction = parse_data->transactions;
00988       while(g_list_next(last_transaction) != NULL)
00989       {
00990         last_transaction = g_list_next(last_transaction);
00991       }
00992     }
00993     /* ... we use only the lines in error_lines. */
00994     if(error_lines == NULL)
00995       i = parse_data->orig_lines->len; /* Don't go into the for loop. */
00996     else
00997       i = GPOINTER_TO_INT(error_lines->data);
00998   }
00999   else /* Otherwise, we look at all the data. */
01000   {
01001     /* The following while-loop effectively behaves like the following for-loop:
01002      * for(i = 0; i < parse_data->orig_lines->len; i++). */
01003     i = 0;
01004     last_transaction = NULL;
01005   }
01006   while(i < parse_data->orig_lines->len)
01007   {
01008     GPtrArray* line = parse_data->orig_lines->pdata[i];
01009     /* This flag is TRUE if there are any errors in this row. */
01010     gboolean errors = FALSE;
01011     gchar* error_message = NULL;
01012     TransPropertyList* list = trans_property_list_new(account, parse_data->date_format);
01013     GncCsvTransLine* trans_line = NULL;
01014 
01015     for(j = 0; j < line->len; j++)
01016     {
01017       /* We do nothing in "None" columns. */
01018       if(column_types->data[j] != GNC_CSV_NONE)
01019       {
01020         /* Affect the transaction appropriately. */
01021         TransProperty* property = trans_property_new(column_types->data[j], list);
01022         gboolean succeeded = trans_property_set(property, line->pdata[j]);
01023         /* TODO Maybe move error handling to within TransPropertyList functions? */
01024         if(succeeded)
01025         {
01026           trans_property_list_add(property);
01027         }
01028         else
01029         {
01030           errors = TRUE;
01031           error_message = g_strdup_printf(_("%s column could not be understood."),
01032                                           _(gnc_csv_column_type_strs[property->type]));
01033           trans_property_free(property);
01034           break;
01035         }
01036       }
01037     }
01038 
01039     /* If we had success, add the transaction to parse_data->transaction. */
01040     if(!errors)
01041     {
01042       trans_line = trans_property_list_to_trans(list, &error_message);
01043       errors = trans_line == NULL;
01044     }
01045 
01046     trans_property_list_free(list);
01047 
01048     /* If there were errors, add this line to parse_data->error_lines. */
01049     if(errors)
01050     {
01051       parse_data->error_lines = g_list_append(parse_data->error_lines,
01052                                               GINT_TO_POINTER(i));
01053       /* If there's already an error message, we need to replace it. */
01054       if(line->len > (int)(parse_data->orig_row_lengths->data[i]))
01055       {
01056         g_free(line->pdata[line->len - 1]);
01057         line->pdata[line->len - 1] = error_message;
01058       }
01059       else
01060       {
01061         /* Put the error message at the end of the line. */
01062         g_ptr_array_add(line, error_message);
01063       }
01064     }
01065     else
01066     {
01067       /* If all went well, add this transaction to the list. */
01068       trans_line->line_no = i;
01069 
01070       /* We keep the transactions sorted by date. We start at the end
01071        * of the list and go backward, simply because the file itself
01072        * is probably also sorted by date (but we need to handle the
01073        * exception anyway). */
01074 
01075       /* If we can just put it at the end, do so and increment last_transaction. */
01076       if(last_transaction == NULL ||
01077          xaccTransGetDate(((GncCsvTransLine*)(last_transaction->data))->trans) <= xaccTransGetDate(trans_line->trans))
01078       {
01079         parse_data->transactions = g_list_append(parse_data->transactions, trans_line);
01080         /* If this is the first transaction, we need to get last_transaction on track. */
01081         if(last_transaction == NULL)
01082           last_transaction = parse_data->transactions;
01083         else /* Otherwise, we can just continue. */
01084           last_transaction = g_list_next(last_transaction);
01085       }
01086       /* Otherwise, search backward for the correct spot. */
01087       else
01088       {
01089         GList* insertion_spot = last_transaction;
01090         while(insertion_spot != NULL &&
01091               xaccTransGetDate(((GncCsvTransLine*)(insertion_spot->data))->trans) > xaccTransGetDate(trans_line->trans))
01092         {
01093           insertion_spot = g_list_previous(insertion_spot);
01094         }
01095         /* Move insertion_spot one location forward since we have to
01096          * use the g_list_insert_before function. */
01097         if(insertion_spot == NULL) /* We need to handle the case of inserting at the beginning of the list. */
01098           insertion_spot = parse_data->transactions;
01099         else
01100           insertion_spot = g_list_next(insertion_spot);
01101         
01102         parse_data->transactions = g_list_insert_before(parse_data->transactions, insertion_spot, trans_line);
01103       }
01104     }
01105 
01106     /* Increment to the next row. */
01107     if(redo_errors)
01108     {
01109       /* Move to the next error line in the list. */
01110       error_lines = g_list_next(error_lines);
01111       if(error_lines == NULL)
01112         i = parse_data->orig_lines->len; /* Don't continue the for loop. */
01113       else
01114         i = GPOINTER_TO_INT(error_lines->data);
01115     }
01116     else
01117     {
01118       i++;
01119     }
01120   }
01121 
01122   /* If we have a balance column, set the appropriate amounts on the transactions. */
01123   hasBalanceColumn = FALSE;
01124   for(i = 0; i < parse_data->column_types->len; i++)
01125   {
01126     if(parse_data->column_types->data[i] == GNC_CSV_BALANCE)
01127     {
01128       hasBalanceColumn = TRUE;
01129       break;
01130     }
01131   }
01132 
01133   if(hasBalanceColumn)
01134   {
01135     GList* transactions = parse_data->transactions;
01136 
01137     /* balance_offset is how much the balance currently in the account
01138      * differs from what it will be after the transactions are
01139      * imported. This will be sum of all the previous transactions for
01140      * any given transaction. */
01141     gnc_numeric balance_offset = double_to_gnc_numeric(0.0,
01142                                                        xaccAccountGetCommoditySCU(account),
01143                                                        GNC_RND_ROUND);
01144     while(transactions != NULL)
01145     {
01146       GncCsvTransLine* trans_line = (GncCsvTransLine*)transactions->data;
01147       if(trans_line->balance_set)
01148       {
01149         time_t date = xaccTransGetDate(trans_line->trans);
01150         /* Find what the balance should be by adding the offset to the actual balance. */
01151         gnc_numeric existing_balance = gnc_numeric_add(balance_offset,
01152                                                        xaccAccountGetBalanceAsOfDate(account, date),
01153                                                        xaccAccountGetCommoditySCU(account),
01154                                                        GNC_RND_ROUND);
01155 
01156         /* The amount of the transaction is the difference between the new and existing balance. */
01157         gnc_numeric amount = gnc_numeric_sub(trans_line->balance,
01158                                              existing_balance,
01159                                              xaccAccountGetCommoditySCU(account),
01160                                              GNC_RND_ROUND);
01161 
01162         SplitList* splits = xaccTransGetSplitList(trans_line->trans);
01163         while(splits)
01164         {
01165           SplitList* next_splits = g_list_next(splits);
01166           xaccSplitDestroy((Split*)splits->data);
01167           splits = next_splits;
01168         }
01169 
01170         trans_add_split(trans_line->trans, account, gnc_account_get_book(account), amount);
01171         
01172         /* This new transaction needs to be added to the balance offset. */
01173         balance_offset = gnc_numeric_add(balance_offset,
01174                                          amount,
01175                                          xaccAccountGetCommoditySCU(account),
01176                                          GNC_RND_ROUND);
01177       }
01178       transactions = g_list_next(transactions);
01179     }
01180   }
01181 
01182   if(redo_errors) /* Now that we're at the end, we do the freeing. */
01183   {
01184     g_list_free(begin_error_lines);
01185   }
01186 
01187   /* We need to resize parse_data->column_types since errors may have added columns. */
01188   for(i = 0; i < parse_data->orig_lines->len; i++)
01189   {
01190     if(max_cols < ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len)
01191       max_cols = ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len;
01192   }
01193   i = parse_data->column_types->len;
01194   parse_data->column_types = g_array_set_size(parse_data->column_types, max_cols);
01195   for(; i < max_cols; i++)
01196   {
01197     parse_data->column_types->data[i] = GNC_CSV_NONE;
01198   }
01199 
01200   return 0;
01201 }

Generated on Mon Sep 8 05:03:58 2008 for GnuCash by  doxygen 1.5.2