Login Register Actian.com  

Actian Community Wiki

Navigation
Learn About
Developing With
Ingres Talk
Information
Toolbox

OME unicode64()

From Ingres Community Wiki

Revision as of 23:39, 8 February 2010; Teresa (Talk | contribs)
(diff) ←Older revision | Current revision | Newer revision→ (diff)
Jump to: navigation, search

Contents

Introduction

The unicode64() function will (much like OME_base64()) convert an input string of nvarchar or varbyte characters into a sequence drawn from the 64 element set 'A'-'Z', 'a'-'z', '0'-'9', '+' and '/'.

The difference between the functions being in the manner of splitting the input stream of bytes. The unicode64() function will split each successive byte pair into 6, 6 and 4 bit words. These words will then be used as offsets into the 64 element set.

Syntax

unicode64(
    (nvarchar ) string
    )

Return Value

A suitably sized varchar.

Examples

Problems

  • I suspect the code will work on Linux boxes but will need alteration to handle platforms which are not big-endian.
  • The code could be extended to include long types, although it does seem like overkill!
  • Older installations of OME may not support nvarchar types. You can try installing the following in your code to see if it helps.
/* Extra macro definitions not covered in iiadd.h
** These may ultimately be provided in a future version of iiadd.h
*/
#ifndef II_NCHAR
#define II_NCHAR 26
#endif

#ifndef II_NVARCHAR
#define II_NVARCHAR 27
#endif

See Also

FOD

Add the following definition to the fod_id enum set: UDF_UNICODE64

Then add the following definiition to the Function_Definitions array

static IIADD_FO_DFN Function_Definitions[]={
    ...
   {
   II_O_OPERATION,   /*fod_object_type*/
   {"unicode64"},    /*fod_name*/
   UDF_UNICODE64,    /*fod_id*/
   II_NORMAL         /*fod_type*/
   },
}

FIDs

Add the following definitions to the fid_id enum set:

UDF_FI_UNICODE64_NVARCHAR

These data type arrays are assumed by the FIDs.

static II_DT_ID  UD_2_NVARCHAR[]   = {II_NVARCHAR, II_NVARCHAR};

The FIDs are:

static IIADD_FI_DFN Function_Instances[] = {
   {
   II_O_FUNCTION_INSTANCE,    /* fid_object_type */
   UDF_FI_UNICODE64_NVARCHAR, /* fid_id*/
   II_NO_FI,                  /* fid_cmplmnt*/
   UDF_UNICODE64,             /* fid_opid=fod_id from function definition
                              ** This is the minor sort field for this array
                              */
   II_NORMAL,                 /* fid_optype
                              ** This is the major sort field for this array
                              */
   II_FID_F0_NOFLAGS,         /* fid_attributes*/
   0,                         /* fid_wslength*/
   1,                         /* fid_numargs*/
   UD_2_NVARCHAR,             /* fid_args, a pointer to an array of datatypes*/
   II_VARCHAR,                /* fid_result, result is an integer*/
   II_RES_EXTERN,             /* fid_rltype*/
   II_LEN_UNKNOWN,            /* fid_rlength */
   0,                         /* fid_rprec */
   unicode64,                 /* fid_routine */
   unicode64_ls               /* lenspec_routine */
   }, /*unicode64(nvarchar)*/
}

Executor Code

II_STATUS
unicode64 (
   II_SCB          *scb,
   II_DATA_VALUE   *p1, /* string to convert  */
   II_DATA_VALUE   *rdv /* return_text  */
   )
{
   int i, inp_length;
   unsigned short true_length, offset;
   unsigned char byte;
   unsigned char xx[3];
   wchar_t *ucSrc;
   wchar_t  this_wchar;
   unsigned char     this_char[2];
   char input_type[20], input_data[51], msg[256];

   /* base64Lookup[]:
   **     A lookup array to encode a single 6 bit value to Base 64.
   */
   unsigned char base64Lookup[64] = {
   /* 00 - 09 */   'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
   /* 10 - 19 */   'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
   /* 20 - 29 */   'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
   /* 30 - 39 */   'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
   /* 40 - 49 */   'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
   /* 50 - 59 */   'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7',
   /* 60 - 63 */   '8', '9', '+', '/'
   };

   rdv->db_prec     = 0;

   /* Check the input parameter type is OK
   ** This will not be necessary once the unicode support is added!
   */
   switch (abs(p1->db_datatype)) {
       /*
       ** Use 26 for nchar and 27 for nvarchar.  An nvarchar is a struct
       ** with an i2 count followed by ucs2 (16-bit) code points.  The count
       ** is in array entries, not bytes.
       */
       case II_NCHAR:
           strcpy(input_type, "nchar");
           true_length = p1->db_length; /*The true length of the input parameter*/
           offset=0;
           break;

       case II_NVARCHAR:
           strcpy(input_type, "nvarchar");
           true_length = *(short *)p1->db_data; /*The true length of the input parameter*/
           offset=sizeof(short);
           break;

       default: /* This just shouldn't happen! */
           sprintf(msg,
               "unicode64(): Unexpected parameter type: %d",
                p1->db_datatype);
           us_error(scb, 0x200010, msg);
           return(II_ERROR);
       };

   /* inp_length:
   **     The number of bytes of storage reserved for the data
   */
   inp_length = (int )p1->db_length - offset;

   for (i=0; i < true_length; i++) {
       memcpy(this_char, (char *)(p1->db_data + offset + i*2), 2);
       this_wchar=0x0000;
       memcpy((char *) &this_wchar, (char *)(p1->db_data + offset + i*2), 2);
       /* First character is drawn from the top 4 bits */
       byte=(unsigned char )(this_wchar >> 12);
       xx[0]=base64Lookup[byte];

       /* Second charcter is drawn from the middle 6 bits */
       byte=(unsigned char )((this_wchar >> 6) & 0x003F);
       xx[1]=base64Lookup[byte];

       /* Third character is drawn from the lower 6 bits */
       byte=(unsigned char )(this_wchar & 0x003F);
       xx[2]=base64Lookup[byte];

       /* If reasonable, add to result */
       if ((sizeof(short)+i*3+3) > rdv->db_length) {
           sprintf(msg,
               "unicode64(): Returned data will be too long for internal buffer");
           us_error(scb, 0x200010, msg);
           return(II_ERROR);
           };
       strncpy((char *)rdv->db_data + sizeof(short) + (i*3), xx, 3);
       };

   /* To get here means things are cool!
   ** Fix the varchar length field in the db_data structure and return.
   */
   *((short *)rdv->db_data)=true_length*3;

   return(II_OK);
} /*unicode64*/


Lenspec Routine

This code assumes the definition:

#define MAX_VARCHAR_LENGTH 32000

The lenspec routine is:

II_STATUS
unicode64_ls (
   II_SCB          *scb,
   II_DT_ID        *opid,
   II_DATA_VALUE   *p1, /* string to convert  */
   II_DATA_VALUE   *p2, /* nothing */
   II_DATA_VALUE   *rdv /* return_text  */
   ) 
{
   char msg[256];
   unsigned short true_length;

   switch (abs(p1->db_datatype)) {
       case II_VARCHAR:
           true_length = p1->db_length;
           if (p1->db_datatype<0) true_length--; /* Subtract 1byte for null */ 
           break;

       case II_NVARCHAR:
           true_length = p1->db_length;
           if (p1->db_datatype<0) true_length--; /* Subtract 1byte for null */ 
           true_length*=2;
           break;
 
       default: /* This just shouldn't happen! */
           sprintf(msg,"unicode64(): Unexpected parameter type: %d", p1->db_datatype);
           Ingres_trace_function(II_TRACE_FE_MASK, strlen(msg), msg);
           us_error(scb, 0x22022, msg); /* E_AD2005 (msg is ignored) */
           return(II_ERROR);;
       };

   rdv->db_length=(int )(true_length*3 + sizeof(short)); 
   if (rdv->db_length > MAX_VARCHAR_LENGTH)
   {
       sprintf(msg,"unicode64(): data is too long for conversion");
       Ingres_trace_function(II_TRACE_FE_MASK, strlen(msg), msg);
       us_error(scb, 0x22022, msg); /* E_AD2005 (msg is ignored) */
       return(II_ERROR);
   };
   return (II_OK);
} /*unicode64_ls */
Personal tools
© 2011 Actian Corporation. All Rights Reserved