netcdf-c/udunits/lib/unitToIdMap.c
2010-06-03 13:24:43 +00:00

816 lines
19 KiB
C

/*
* Copyright 2008, 2009 University Corporation for Atmospheric Research
*
* This file is part of the UDUNITS-2 package. See the file LICENSE
* in the top-level source-directory of the package for copying and
* redistribution conditions.
*/
/*
* Unit-to-identifier map.
*/
/*LINTLIBRARY*/
#ifndef _XOPEN_SOURCE
# define _XOPEN_SOURCE 500
#endif
#include <assert.h>
#include <errno.h>
#include <search.h>
#include <stdlib.h>
#include <string.h>
#include "udunits2.h"
#include "unitAndId.h"
#include "unitToIdMap.h" /* this module's API */
#include "systemMap.h"
typedef struct {
void* ascii;
void* latin1;
void* utf8;
} UnitToIdMap;
static SystemMap* systemToUnitToName = NULL;
static SystemMap* systemToUnitToSymbol = NULL;
/******************************************************************************
* Miscellaneous Functions:
******************************************************************************/
/*
* Unconditionally converts an ISO Latin-1 string into a UTF-8 string.
*
* Arguments:
* latin1String Pointer to the ISO Latin-1 string.
* Returns:
* NULL Failure. See errno.
* else Pointer to the equivalent UTF-8 string. Should be freed
* when no longer needed.
*/
static char*
latin1ToUtf8(
const char* const latin1String)
{
int nchar;
const char* inp;
char* outp;
char* utf8String;
for (nchar = 0, inp = latin1String; *inp; ++inp, ++nchar)
if ((*inp & 0x80U) != 0)
nchar++;
utf8String = malloc(nchar+1);
if (utf8String != NULL) {
for (inp = latin1String, outp = utf8String; *inp; ++inp, ++outp) {
if ((*inp & 0x80U) == 0) {
*outp = *inp;
}
else {
*outp++ = (char)(0xC0U | ((unsigned)*inp >> 6));
*outp = (char)(0x80U | (*inp & 0x3FU));
}
}
*outp = 0;
}
return utf8String;
}
/*
* Adjust a given encoding according to a string. Because ASCII is a subset of
* ISO Latin-1 and because a UTF-8 encoded string must follow certain rules,
* it's possible for strings to be mis-encoded and for an encoding specification
* to be too restrictive or over-generous. If the encoding specification is
* ASCII and the given string contains a character with the high-order bit
* set, then the encoding will be changed to ISO Latin-1. If the encoding
* specification is ISO Latin-1 but the string doesn't contain a character with
* the high-order bit set, then the encoding will be changed to ASCII. If the
* encoding specification is UTF-8 but the string doesn't follow the UTF-8
* encoding rules, then the function will error-return.
*
* Arguments:
* encoding Pointer to the presumptive encoding. Might be modified
* on return to reflect the actual, most restrictive,
* encoding of "string".
* string The string to be checked.
* Returns:
* 0 Success. "*encoding" might be modified.
* -1 Failure. "string" doesn't conform to "encoding".
*/
static int
adjustEncoding(
ut_encoding* const encoding,
const char* string)
{
int status = 0; /* success */
if (*encoding == UT_ASCII) {
for (; *string && ((*string & 0x80U) == 0); string++)
;
if (*string != 0)
*encoding = UT_LATIN1;
}
else if (*encoding == UT_LATIN1) {
for (; *string && ((*string & 0x80U) == 0); string++)
;
if (*string == 0)
*encoding = UT_ASCII;
}
else if (*encoding == UT_UTF8) {
for (; *string; string++) {
if (*string & 0x80U) {
if ((*string & 0xE0U) == 0xC0U) {
if ((*++string & 0xC0U) != 0x80U)
break;
}
else if ((*string & 0xF0U) == 0xE0U) {
if ((*++string & 0xC0U) != 0x80U)
break;
if ((*++string & 0xC0U) != 0x80U)
break;
}
else if ((*string & 0xF8U) == 0xF0U) {
if ((*++string & 0xC0U) != 0x80U)
break;
if ((*++string & 0xC0U) != 0x80U)
break;
if ((*++string & 0xC0U) != 0x80U)
break;
}
}
}
if (*string)
status = -1;
}
return status;
}
/******************************************************************************
* Internal Map Functions:
******************************************************************************/
static int
compareUnits(
const void* const entry1,
const void* const entry2)
{
return ut_compare(((const UnitAndId*)entry1)->unit,
((const UnitAndId*)entry2)->unit);
}
/*
* Selects a unit-and-identifier tree corresponding to a given encoding.
*
* Arguments:
* map The unit-to-id map.
* encoding The encoding.
* Returns:
* Pointer to the root of the unit-and-identifier tree in "map" that
* corresponds to "encoding".
*/
static void**
selectTree(
UnitToIdMap* const unitToIdMap,
const ut_encoding encoding)
{
return
encoding == UT_ASCII
? &unitToIdMap->ascii
: encoding == UT_LATIN1
? &unitToIdMap->latin1
: &unitToIdMap->utf8;
}
/*
* Returns a new instance of a unit-to-identifier map.
*
* Returns:
* NULL Failure. See "errno".
* else Pointer to a new unit-to-identifier map.
*/
static UnitToIdMap*
utimNew(void)
{
UnitToIdMap* const map = malloc(sizeof(UnitToIdMap));
if (map != NULL) {
map->ascii = NULL;
map->latin1 = NULL;
map->utf8 = NULL;
}
return map;
}
/*
* Frees a unit-to-identifier map. All entries in all encodings are freed.
*
* Arguments:
* map Pointer to the map to be freed.
*/
static void
utimFree(
UnitToIdMap* map)
{
if (map != NULL) {
ut_encoding encodings[] = {UT_ASCII, UT_LATIN1, UT_UTF8};
int i;
for (i = 0; i < sizeof(encodings)/sizeof(encodings[0]); ++i) {
void** rootp = selectTree(map, encodings[i]);
while (*rootp != NULL) {
UnitAndId* uai = **(UnitAndId***)rootp;
(void)tdelete(uai, rootp, compareUnits);
uaiFree(uai);
}
}
free(map);
}
}
/*
* Adds an entry to a unit-to-identifier map.
*
* Arguments:
* map Pointer to unit-to-identifier map.
* unit The unit. May be freed upon return.
* id The identifier. May be freed upon return.
* encoding The ostensible encoding of "id".
* Returns:
* UT_BAD_ARG "id" is inconsistent with "encoding".
* UT_OS Operating-system error. See "errno".
* UT_EXISTS "unit" already maps to a different identifier.
* UT_SUCCESS Success.
*/
static ut_status
utimAdd(
UnitToIdMap* const map,
const ut_unit* unit,
const char* const id,
ut_encoding encoding)
{
ut_status status;
assert(map != NULL);
assert(unit != NULL);
assert(id != NULL);
if (adjustEncoding(&encoding, id)) {
status = UT_BAD_ARG;
ut_set_status(status);
ut_handle_error_message("Identifier not in given encoding");
}
else {
UnitAndId* targetEntry = uaiNew(unit, id);
if (targetEntry != NULL) {
void** rootp = selectTree(map, encoding);
UnitAndId** treeEntry = tsearch(targetEntry, rootp, compareUnits);
if (treeEntry == NULL) {
status = UT_OS;
ut_set_status(status);
ut_handle_error_message(strerror(errno));
ut_handle_error_message("Couldn't add search-tree entry");
uaiFree(targetEntry);
}
else {
if (strcmp((*treeEntry)->id, id) != 0) {
status = UT_EXISTS;
ut_set_status(status);
ut_handle_error_message("Unit already maps to \"%s\"",
(*treeEntry)->id);
}
else {
status = UT_SUCCESS;
}
if (targetEntry != *treeEntry)
uaiFree(targetEntry);
}
} /* "targetEntry" allocated */
} /* valid arguments */
return status;
}
/*
* Removes an entry from a unit-to-identifier map.
*
* Arguments:
* map Pointer to the unit-to-identifier map.
* unit The unit. May be freed upon return.
* encoding The encoding to be removed.
* Returns:
* UT_SUCCESS Success.
*/
static ut_status
utimRemove(
UnitToIdMap* const map,
const ut_unit* unit,
ut_encoding encoding)
{
ut_status status;
UnitAndId targetEntry;
UnitAndId** treeEntry;
assert(map != NULL);
assert(unit != NULL);
targetEntry.unit = (ut_unit*)unit;
treeEntry = tfind(&targetEntry, selectTree(map, encoding), compareUnits);
if (treeEntry == NULL || *treeEntry == NULL) {
status = UT_SUCCESS;
}
else {
UnitAndId* uai = *treeEntry;
(void)tdelete(uai, selectTree(map, encoding), compareUnits);
uaiFree(uai);
}
return status;
}
/*
* Returns the unit-and-identifier whose ASCII identifier corresponding to a
* unit.
*
* Arguments:
* map The unit-to-identifier map.
* unit The unit to be used as the key in the search.
* Returns:
* NULL The map doesn't contain an entry corresponding to "unit" whose
* identifier is in ASCII.
* else Pointer to the entry corresponding to "unit" whose identifier is
* in ASCII.
*/
static UnitAndId*
utimFindAsciiByUnit(
UnitToIdMap* const map,
const ut_unit* const unit)
{
UnitAndId targetEntry;
UnitAndId** treeEntry;
targetEntry.unit = (ut_unit*)unit;
treeEntry = tfind(&targetEntry, &map->ascii, compareUnits);
return treeEntry == NULL ? NULL : *treeEntry;
}
/*
* Finds a unit-search-entry with a Latin-1 identifier correcponding to a unit.
*
* Arguments:
* map The unit-to-identifier map.
* unit The unit to be used as the key in the search.
* Returns:
* NULL The map doesn't contain an entry corresponding to "unit" whose
* identifier is in Latin-1.
* else Pointer to the entry corresponding to "unit" whose identifier is
* in Latin-1 (and might, actually, be in ASCII).
*/
static UnitAndId*
utimFindLatin1ByUnit(
UnitToIdMap* const map,
const ut_unit* const unit)
{
UnitAndId targetEntry;
UnitAndId** treeEntry;
targetEntry.unit = (ut_unit*)unit;
treeEntry = tfind(&targetEntry, &map->latin1, compareUnits);
if (treeEntry == NULL)
treeEntry = tfind(&targetEntry, &map->ascii, compareUnits);
return treeEntry == NULL ? NULL : *treeEntry;
}
/*
* Finds an entry with a UTF-8 identifier corresponding to a unit.
*
* Arguments:
* map The unit-to-identifier map.
* unit The unit to be used as the key in the search.
* Returns:
* NULL The map doesn't contain an entry corresponding to "unit" whose
* identifier is in UTF-8.
* else Pointer to the entry corresponding to "unit" whose identifier is
* in UTF-8 (and might, actually, be in ASCII).
*/
static UnitAndId*
utimFindUtf8ByUnit(
UnitToIdMap* const map,
const ut_unit* const unit)
{
UnitAndId targetEntry;
UnitAndId** treeEntry = NULL; /* failure */
targetEntry.unit = (ut_unit*)unit;
treeEntry = tfind(&targetEntry, &map->utf8, compareUnits);
if (treeEntry == NULL) {
treeEntry = tfind(&targetEntry, &map->latin1, compareUnits);
if (treeEntry == NULL) {
treeEntry = tfind(&targetEntry, &map->ascii, compareUnits);
}
else {
/*
* Create the UTF-8 version of the Latin-1 identifier and add it to
* the UTF-8 unit-to-id map so that it will be found next time.
*/
char* const id = latin1ToUtf8((*treeEntry)->id);
if (id == NULL) {
ut_set_status(UT_OS);
ut_handle_error_message(strerror(errno));
ut_handle_error_message(
"Couldn't convert identifier from ISO-8859-1 to UTF-8");
treeEntry = NULL;
}
else {
UnitAndId* newEntry = uaiNew(unit, id);
if (newEntry != NULL) {
treeEntry = tsearch(newEntry, &map->utf8, compareUnits);
if (treeEntry == NULL) {
uaiFree(newEntry);
ut_set_status(UT_OS);
ut_handle_error_message(strerror(errno));
ut_handle_error_message(
"Couldn't add unit-and-identifier to search-tree");
}
}
free(id);
} /* UTF-8 identifier created */
} /* found Latin-1 identifier */
} /* no UTF-8 identifier */
return treeEntry == NULL ? NULL : *treeEntry;
}
/*
* Adds an entry to the unit-to-identifier map associated with a unit-system.
*
* Arguments:
* sytemMap Address of the pointer to the
* system-to-unit-to-identifier map.
* unit The unit. May be freed upon return.
* id The identifier. May be freed upon return.
* encoding The ostensible encoding of "id".
* Returns:
* UT_BAD_ARG "unit" or "id" is NULL, or "id" is inconsistent with
* "encoding".
* UT_OS Operating-system error. See "errno".
* UT_EXISTS "unit" already maps to a different identifier.
* UT_SUCCESS Success.
*/
static ut_status
mapUnitToId(
SystemMap** const systemMap,
const ut_unit* const unit,
const char* const id,
ut_encoding encoding)
{
ut_status status;
assert(systemMap != NULL);
if (unit == NULL || id == NULL) {
status = UT_BAD_ARG;
}
else {
if (*systemMap == NULL) {
*systemMap = smNew();
if (*systemMap == NULL)
status = UT_OS;
}
if (*systemMap != NULL) {
UnitToIdMap** const unitToIdMap =
(UnitToIdMap**)smSearch(*systemMap, ut_get_system(unit));
if (unitToIdMap == NULL) {
status = UT_OS;
}
else {
if (*unitToIdMap == NULL) {
*unitToIdMap = utimNew();
if (*unitToIdMap == NULL)
status = UT_OS;
}
if (*unitToIdMap != NULL)
status = utimAdd(*unitToIdMap, unit, id, encoding);
}
}
}
return status;
}
/*
* Removes an entry from the unit-to-identifier map associated with a
* unit-system.
*
* Arguments:
* sytemMap Pointer to the system-to-unit-to-identifier map.
* unit The unit. May be freed upon return.
* encoding The ostensible encoding of "id".
* Returns:
* UT_BAD_ARG "systemMap" is NULL.
* UT_BAD_ARG "unit" is NULL.
* UT_SUCCESS Success.
*/
static ut_status
unmapUnitToId(
SystemMap* const systemMap,
const ut_unit* const unit,
ut_encoding encoding)
{
ut_status status;
if (systemMap == NULL || unit == NULL) {
status = UT_BAD_ARG;
}
else {
UnitToIdMap** const unitToIdMap =
(UnitToIdMap**)smFind(systemMap, ut_get_system(unit));
status =
(unitToIdMap == NULL || *unitToIdMap == NULL)
? UT_SUCCESS
: utimRemove(*unitToIdMap, unit, encoding);
}
return status;
}
/*
* Returns the identifier in a given encoding to which a unit associated with
* a unit-system maps.
*
* Arguments:
* systemMap Pointer to the system-to-unit-to-id map.
* unit Pointer to the unit whose identifier should be returned.
* encoding The desired encoding of the identifier.
* Returns:
* NULL Failure. "ut_get_status()" will be
* UT_BAD_ARG "unit" was NULL.
* else Pointer to the identifier in the given encoding
* associated with "unit".
*/
static const char*
getId(
SystemMap* const systemMap,
const ut_unit* const unit,
const ut_encoding encoding)
{
const char* id = NULL; /* failure */
if (unit == NULL) {
ut_set_status(UT_BAD_ARG);
ut_handle_error_message("NULL unit argument");
}
else {
UnitToIdMap** const unitToId =
(UnitToIdMap**)smFind(systemMap, ut_get_system(unit));
if (unitToId != NULL) {
UnitAndId* mapEntry =
encoding == UT_LATIN1
? utimFindLatin1ByUnit(*unitToId, unit)
: encoding == UT_UTF8
? utimFindUtf8ByUnit(*unitToId, unit)
: utimFindAsciiByUnit(*unitToId, unit);
if (mapEntry != NULL)
id = mapEntry->id;
}
}
return id;
}
/******************************************************************************
* Public API:
******************************************************************************/
/*
* Adds a mapping from a unit to a name.
*
* Arguments:
* unit Pointer to the unit to be mapped to "name". May be
* freed upon return.
* name Pointer to the name to be mapped-to by "unit". May be
* freed upon return.
* encoding The encoding of "name".
* Returns:
* UT_SUCCESS Success.
* UT_BAD_ARG "unit" or "name" is NULL, or "name" is not in the
* specified encoding.
* UT_OS Operating-system error. See "errno".
* UT_EXISTS "unit" already maps to a name.
*/
ut_status
ut_map_unit_to_name(
const ut_unit* const unit,
const char* const name,
ut_encoding encoding)
{
ut_set_status(mapUnitToId(&systemToUnitToName, unit, name, encoding));
return ut_get_status();
}
/*
* Removes a mapping from a unit to a name.
*
* Arguments:
* unit Pointer to the unit. May be freed upon return.
* encoding The encoding to be removed. No other encodings will be
* removed.
* Returns:
* UT_BAD_ARG "unit" is NULL.
* UT_SUCCESS Success.
*/
ut_status
ut_unmap_unit_to_name(
const ut_unit* const unit,
ut_encoding encoding)
{
ut_set_status(unmapUnitToId(systemToUnitToName, unit, encoding));
return ut_get_status();
}
/*
* Adds a mapping from a unit to a symbol.
*
* Arguments:
* unit Pointer to the unit to be mapped to "symbol". May be
* freed upon return.
* symbol Pointer to the symbol to be mapped-to by "unit". May
* be freed upon return.
* encoding The encoding of "symbol".
* Returns:
* UT_SUCCESS Success.
* UT_BAD_ARG "unit" or "symbol" is NULL.
* UT_OS Operating-system error. See "errno".
* UT_EXISTS "unit" already maps to a symbol.
*/
ut_status
ut_map_unit_to_symbol(
const ut_unit* unit,
const char* const symbol,
ut_encoding encoding)
{
ut_set_status(mapUnitToId(&systemToUnitToSymbol, unit, symbol, encoding));
return ut_get_status();
}
/*
* Removes a mapping from a unit to a symbol.
*
* Arguments:
* unit Pointer to the unit to be unmapped to a symbol. May be
* freed upon return.
* encoding The encoding to be removed. The mappings for "unit" in
* other encodings will not be removed.
* Returns:
* UT_SUCCESS Success.
* UT_BAD_ARG "unit" is NULL.
*/
ut_status
ut_unmap_unit_to_symbol(
const ut_unit* const unit,
ut_encoding encoding)
{
ut_set_status(unmapUnitToId(systemToUnitToSymbol, unit, encoding));
return ut_get_status();
}
/*
* Returns the name in a given encoding to which a unit maps.
*
* Arguments:
* unit Pointer to the unit whose name should be returned.
* encoding The desired encoding of the name.
* Returns:
* NULL Failure. "ut_get_status()" will be
* UT_BAD_ARG "unit" is NULL.
* UT_SUCCESS "unit" doesn't map to a name in
* in the given encoding.
* else Pointer to the name in the given encoding to which
* "unit" maps.
*/
const char*
ut_get_name(
const ut_unit* const unit,
const ut_encoding encoding)
{
ut_set_status(UT_SUCCESS);
return getId(systemToUnitToName, unit, encoding);
}
/*
* Returns the symbol in a given encoding to which a unit maps.
*
* Arguments:
* unit Pointer to the unit whose symbol should be returned.
* encoding The desired encoding of the symbol.
* Returns:
* NULL Failure. "ut_get_status()" will be
* UT_BAD_ARG "unit" is NULL.
* UT_SUCCESS "unit" doesn't map to a symbol
* in the given encoding.
* else Pointer to the symbol in the given encoding to which
* "unit" maps.
*/
const char*
ut_get_symbol(
const ut_unit* const unit,
const ut_encoding encoding)
{
ut_set_status(UT_SUCCESS);
return getId(systemToUnitToSymbol, unit, encoding);
}
/*
* Frees resources associated with a unit-system.
*
* Arguments:
* system Pointer to the unit-system to have its associated
* resources freed.
*/
void
utimFreeSystem(
ut_system* system)
{
if (system != NULL) {
SystemMap* systemMaps[2];
int i;
systemMaps[0] = systemToUnitToName;
systemMaps[1] = systemToUnitToSymbol;
for (i = 0; i < 2; i++) {
if (systemMaps[i] != NULL) {
UnitToIdMap** const unitToId =
(UnitToIdMap**)smFind(systemMaps[i], system);
if (unitToId != NULL)
utimFree(*unitToId);
smRemove(systemMaps[i], system);
}
}
}
}