1*101e15b5SRichard Lowe /* 2*101e15b5SRichard Lowe * CDDL HEADER START 3*101e15b5SRichard Lowe * 4*101e15b5SRichard Lowe * The contents of this file are subject to the terms of the 5*101e15b5SRichard Lowe * Common Development and Distribution License, Version 1.0 only 6*101e15b5SRichard Lowe * (the "License"). You may not use this file except in compliance 7*101e15b5SRichard Lowe * with the License. 8*101e15b5SRichard Lowe * 9*101e15b5SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*101e15b5SRichard Lowe * or http://www.opensolaris.org/os/licensing. 11*101e15b5SRichard Lowe * See the License for the specific language governing permissions 12*101e15b5SRichard Lowe * and limitations under the License. 13*101e15b5SRichard Lowe * 14*101e15b5SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 15*101e15b5SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*101e15b5SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 17*101e15b5SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 18*101e15b5SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 19*101e15b5SRichard Lowe * 20*101e15b5SRichard Lowe * CDDL HEADER END 21*101e15b5SRichard Lowe */ 22*101e15b5SRichard Lowe /* 23*101e15b5SRichard Lowe * Copyright 1998-2003 Sun Microsystems, Inc. All rights reserved. 24*101e15b5SRichard Lowe * Use is subject to license terms. 25*101e15b5SRichard Lowe */ 26*101e15b5SRichard Lowe 27*101e15b5SRichard Lowe #ifndef _SORT_TYPES_H 28*101e15b5SRichard Lowe #define _SORT_TYPES_H 29*101e15b5SRichard Lowe 30*101e15b5SRichard Lowe #ifdef __cplusplus 31*101e15b5SRichard Lowe extern "C" { 32*101e15b5SRichard Lowe #endif 33*101e15b5SRichard Lowe 34*101e15b5SRichard Lowe #include <sys/resource.h> 35*101e15b5SRichard Lowe #include <sys/types.h> 36*101e15b5SRichard Lowe #include <limits.h> 37*101e15b5SRichard Lowe #include <stdio.h> 38*101e15b5SRichard Lowe #include <stdlib.h> 39*101e15b5SRichard Lowe 40*101e15b5SRichard Lowe typedef int flag_t; 41*101e15b5SRichard Lowe 42*101e15b5SRichard Lowe typedef int (*cmp_fcn_t)(void *, void *, flag_t); 43*101e15b5SRichard Lowe 44*101e15b5SRichard Lowe typedef union vchar { 45*101e15b5SRichard Lowe char sc; 46*101e15b5SRichard Lowe uchar_t usc; 47*101e15b5SRichard Lowe wchar_t wc; 48*101e15b5SRichard Lowe } vchar_t; 49*101e15b5SRichard Lowe 50*101e15b5SRichard Lowe typedef union vcharptr { 51*101e15b5SRichard Lowe char *sp; 52*101e15b5SRichard Lowe uchar_t *usp; 53*101e15b5SRichard Lowe wchar_t *wp; 54*101e15b5SRichard Lowe } vcharptr_t; 55*101e15b5SRichard Lowe 56*101e15b5SRichard Lowe typedef struct line_rec { 57*101e15b5SRichard Lowe vcharptr_t l_data; /* raw data */ 58*101e15b5SRichard Lowe vcharptr_t l_raw_collate; /* collatable raw data */ 59*101e15b5SRichard Lowe vcharptr_t l_collate; /* key-ordered collatable string */ 60*101e15b5SRichard Lowe ssize_t l_data_length; 61*101e15b5SRichard Lowe ssize_t l_collate_length; 62*101e15b5SRichard Lowe ssize_t l_collate_bufsize; 63*101e15b5SRichard Lowe } line_rec_t; 64*101e15b5SRichard Lowe 65*101e15b5SRichard Lowe enum field_species { 66*101e15b5SRichard Lowe ALPHA, 67*101e15b5SRichard Lowe MONTH, 68*101e15b5SRichard Lowe NUMERIC 69*101e15b5SRichard Lowe }; 70*101e15b5SRichard Lowe 71*101e15b5SRichard Lowe #define FIELD_DICTIONARY_ORDER 0x1 72*101e15b5SRichard Lowe #define FIELD_FOLD_UPPERCASE 0x2 73*101e15b5SRichard Lowe #define FIELD_IGNORE_NONPRINTABLES 0x4 74*101e15b5SRichard Lowe #define FIELD_IGNORE_BLANKS_START 0x8 75*101e15b5SRichard Lowe #define FIELD_IGNORE_BLANKS_END 0x10 76*101e15b5SRichard Lowe 77*101e15b5SRichard Lowe #define FIELD_REVERSE_COMPARISONS 0x20 78*101e15b5SRichard Lowe 79*101e15b5SRichard Lowe #define FIELD_MODIFIERS_DEFINED 0x40 80*101e15b5SRichard Lowe 81*101e15b5SRichard Lowe typedef struct field { 82*101e15b5SRichard Lowe struct field *f_next; 83*101e15b5SRichard Lowe 84*101e15b5SRichard Lowe /* 85*101e15b5SRichard Lowe * field ops vector 86*101e15b5SRichard Lowe */ 87*101e15b5SRichard Lowe ssize_t (*f_convert)(struct field *, line_rec_t *, 88*101e15b5SRichard Lowe vchar_t, ssize_t, ssize_t, ssize_t); 89*101e15b5SRichard Lowe enum field_species f_species; 90*101e15b5SRichard Lowe 91*101e15b5SRichard Lowe /* 92*101e15b5SRichard Lowe * starting and ending fields, and offsets 93*101e15b5SRichard Lowe */ 94*101e15b5SRichard Lowe int f_start_field; 95*101e15b5SRichard Lowe ssize_t f_start_offset; 96*101e15b5SRichard Lowe 97*101e15b5SRichard Lowe int f_end_field; 98*101e15b5SRichard Lowe ssize_t f_end_offset; 99*101e15b5SRichard Lowe 100*101e15b5SRichard Lowe flag_t f_options; 101*101e15b5SRichard Lowe } field_t; 102*101e15b5SRichard Lowe 103*101e15b5SRichard Lowe #define STREAM_SOURCE_MASK 0x000f 104*101e15b5SRichard Lowe #define STREAM_NO_SOURCE 0x0000 105*101e15b5SRichard Lowe #define STREAM_ARRAY 0x0001 106*101e15b5SRichard Lowe #define STREAM_MMAP 0x0002 107*101e15b5SRichard Lowe #define STREAM_SINGLE 0x0004 108*101e15b5SRichard Lowe #define STREAM_WIDE 0x0008 109*101e15b5SRichard Lowe 110*101e15b5SRichard Lowe #define STREAM_OPEN 0x0010 111*101e15b5SRichard Lowe #define STREAM_PRIMED 0x0020 112*101e15b5SRichard Lowe 113*101e15b5SRichard Lowe #define STREAM_OUTPUT 0x0040 114*101e15b5SRichard Lowe #define STREAM_EOS_REACHED 0x0080 115*101e15b5SRichard Lowe #define STREAM_NOTFILE 0x0100 116*101e15b5SRichard Lowe #define STREAM_UNIQUE 0x0200 117*101e15b5SRichard Lowe #define STREAM_INSTANT 0x0400 118*101e15b5SRichard Lowe #define STREAM_TEMPORARY 0x0800 119*101e15b5SRichard Lowe #define STREAM_NOT_FREEABLE 0x1000 120*101e15b5SRichard Lowe 121*101e15b5SRichard Lowe #define DEFAULT_INPUT_SIZE (1 * MEGABYTE) 122*101e15b5SRichard Lowe #define DEFAULT_RELEASE_SIZE (MEGABYTE / 2) 123*101e15b5SRichard Lowe 124*101e15b5SRichard Lowe #define CHAR_AVG_LINE 32 125*101e15b5SRichard Lowe #define WCHAR_AVG_LINE (sizeof (wchar_t) * CHAR_AVG_LINE) 126*101e15b5SRichard Lowe #define XFRM_MULTIPLIER 8 127*101e15b5SRichard Lowe 128*101e15b5SRichard Lowe #define NEXT_LINE_COMPLETE 0x0 129*101e15b5SRichard Lowe #define NEXT_LINE_INCOMPLETE 0x1 130*101e15b5SRichard Lowe 131*101e15b5SRichard Lowe #define PRIME_SUCCEEDED 0x0 132*101e15b5SRichard Lowe #define PRIME_FAILED_EMPTY_FILE 0x1 133*101e15b5SRichard Lowe #define PRIME_FAILED 0x2 134*101e15b5SRichard Lowe 135*101e15b5SRichard Lowe typedef struct stream_array { 136*101e15b5SRichard Lowe line_rec_t **s_array; 137*101e15b5SRichard Lowe ssize_t s_array_size; 138*101e15b5SRichard Lowe ssize_t s_cur_index; 139*101e15b5SRichard Lowe } stream_array_t; 140*101e15b5SRichard Lowe 141*101e15b5SRichard Lowe typedef struct stream_simple_file { 142*101e15b5SRichard Lowe /* 143*101e15b5SRichard Lowe * stream_simple_file_t is used for STREAM_MMAP and for STREAM_OUTPUT 144*101e15b5SRichard Lowe * for either single- (STREAM_SINGLE | STREAM_OUTPUT) or multi-byte 145*101e15b5SRichard Lowe * (STREAM_WIDE | STREAM_OUTPUT) locales. 146*101e15b5SRichard Lowe */ 147*101e15b5SRichard Lowe int s_fd; /* file descriptor */ 148*101e15b5SRichard Lowe caddr_t s_release_origin; /* start for next madvise(3C) */ 149*101e15b5SRichard Lowe } stream_simple_file_t; 150*101e15b5SRichard Lowe 151*101e15b5SRichard Lowe typedef struct stream_buffered_file { 152*101e15b5SRichard Lowe /* 153*101e15b5SRichard Lowe * stream_buffered_file_t is used for both STREAM_STDIO and 154*101e15b5SRichard Lowe * STREAM_WIDE. 155*101e15b5SRichard Lowe */ 156*101e15b5SRichard Lowe FILE *s_fp; /* file stream */ 157*101e15b5SRichard Lowe void *s_vbuf; /* stdio alternate buffer */ 158*101e15b5SRichard Lowe size_t s_bytes_used; 159*101e15b5SRichard Lowe } stream_buffered_file_t; 160*101e15b5SRichard Lowe 161*101e15b5SRichard Lowe typedef union stream_type { 162*101e15b5SRichard Lowe stream_array_t LA; /* array of line records */ 163*101e15b5SRichard Lowe stream_simple_file_t SF; /* file accessed via mmap */ 164*101e15b5SRichard Lowe stream_buffered_file_t BF; /* file accessed via stdio */ 165*101e15b5SRichard Lowe } stream_type_t; 166*101e15b5SRichard Lowe 167*101e15b5SRichard Lowe struct stream; 168*101e15b5SRichard Lowe 169*101e15b5SRichard Lowe typedef struct stream_ops { 170*101e15b5SRichard Lowe int (*sop_is_closable)(struct stream *); 171*101e15b5SRichard Lowe int (*sop_close)(struct stream *); 172*101e15b5SRichard Lowe int (*sop_eos)(struct stream *); 173*101e15b5SRichard Lowe ssize_t (*sop_fetch)(struct stream *); 174*101e15b5SRichard Lowe void (*sop_flush)(struct stream *); 175*101e15b5SRichard Lowe int (*sop_free)(struct stream *); 176*101e15b5SRichard Lowe int (*sop_open_for_write)(struct stream *); 177*101e15b5SRichard Lowe int (*sop_prime)(struct stream *); 178*101e15b5SRichard Lowe void (*sop_put_line)(struct stream *, line_rec_t *); 179*101e15b5SRichard Lowe void (*sop_release_line)(struct stream *); 180*101e15b5SRichard Lowe void (*sop_send_eol)(struct stream *); 181*101e15b5SRichard Lowe int (*sop_unlink)(struct stream *); 182*101e15b5SRichard Lowe } stream_ops_t; 183*101e15b5SRichard Lowe 184*101e15b5SRichard Lowe #define SOP_IS_CLOSABLE(s) ((s)->s_ops.sop_is_closable)(s) 185*101e15b5SRichard Lowe #define SOP_CLOSE(s) ((s)->s_ops.sop_close)(s) 186*101e15b5SRichard Lowe #define SOP_EOS(s) ((s)->s_ops.sop_eos)(s) 187*101e15b5SRichard Lowe #define SOP_FETCH(s) ((s)->s_ops.sop_fetch)(s) 188*101e15b5SRichard Lowe #define SOP_FLUSH(s) ((s)->s_ops.sop_flush)(s) 189*101e15b5SRichard Lowe #define SOP_FREE(s) ((s)->s_ops.sop_free)(s) 190*101e15b5SRichard Lowe #define SOP_OPEN_FOR_WRITE(s) ((s)->s_ops.sop_open_for_write)(s) 191*101e15b5SRichard Lowe #define SOP_PRIME(s) ((s)->s_ops.sop_prime)(s) 192*101e15b5SRichard Lowe #define SOP_PUT_LINE(s, l) ((s)->s_ops.sop_put_line)(s, l) 193*101e15b5SRichard Lowe #define SOP_RELEASE_LINE(s) ((s)->s_ops.sop_release_line)(s) 194*101e15b5SRichard Lowe #define SOP_SEND_EOL(s) ((s)->s_ops.sop_send_eol)(s) 195*101e15b5SRichard Lowe #define SOP_UNLINK(s) ((s)->s_ops.sop_unlink)(s) 196*101e15b5SRichard Lowe 197*101e15b5SRichard Lowe /* 198*101e15b5SRichard Lowe * The stream_t type is provided to simplify access to files, particularly for 199*101e15b5SRichard Lowe * external merges. 200*101e15b5SRichard Lowe */ 201*101e15b5SRichard Lowe typedef struct stream { 202*101e15b5SRichard Lowe struct stream *s_consumer; /* dependent on s_buffer */ 203*101e15b5SRichard Lowe struct stream *s_previous; 204*101e15b5SRichard Lowe struct stream *s_next; 205*101e15b5SRichard Lowe 206*101e15b5SRichard Lowe char *s_filename; 207*101e15b5SRichard Lowe 208*101e15b5SRichard Lowe line_rec_t s_current; /* present line buffers */ 209*101e15b5SRichard Lowe stream_ops_t s_ops; /* type-specific ops vector */ 210*101e15b5SRichard Lowe stream_type_t s_type; /* type-specific attributes */ 211*101e15b5SRichard Lowe 212*101e15b5SRichard Lowe void *s_buffer; 213*101e15b5SRichard Lowe size_t s_buffer_size; 214*101e15b5SRichard Lowe off_t s_filesize; 215*101e15b5SRichard Lowe size_t s_element_size; 216*101e15b5SRichard Lowe flag_t s_status; /* flags */ 217*101e15b5SRichard Lowe ino_t s_ino; 218*101e15b5SRichard Lowe dev_t s_dev; 219*101e15b5SRichard Lowe } stream_t; 220*101e15b5SRichard Lowe 221*101e15b5SRichard Lowe /* 222*101e15b5SRichard Lowe * sort(1) has, for debugging purposes, a primitive compile-time option to 223*101e15b5SRichard Lowe * generate statistics of various operations executed during an invocation. 224*101e15b5SRichard Lowe * These statistics are recorded in the following sort_statistics_t structure. 225*101e15b5SRichard Lowe */ 226*101e15b5SRichard Lowe typedef struct sort_statistics { 227*101e15b5SRichard Lowe u_longlong_t st_avail_mem; 228*101e15b5SRichard Lowe u_longlong_t st_convert_reallocs; 229*101e15b5SRichard Lowe u_longlong_t st_fetched_lines; 230*101e15b5SRichard Lowe u_longlong_t st_insert_full_down; 231*101e15b5SRichard Lowe u_longlong_t st_insert_full_input; 232*101e15b5SRichard Lowe u_longlong_t st_insert_full_up; 233*101e15b5SRichard Lowe u_longlong_t st_line_conversions; 234*101e15b5SRichard Lowe u_longlong_t st_not_unique_lines; 235*101e15b5SRichard Lowe u_longlong_t st_put_lines; 236*101e15b5SRichard Lowe u_longlong_t st_put_temp_lines_internal; 237*101e15b5SRichard Lowe u_longlong_t st_put_temp_lines_merge; 238*101e15b5SRichard Lowe u_longlong_t st_put_unique_lines; 239*101e15b5SRichard Lowe u_longlong_t st_shelved_lines; 240*101e15b5SRichard Lowe u_longlong_t st_subfiles; /* number of insertion sorts */ 241*101e15b5SRichard Lowe u_longlong_t st_swaps; 242*101e15b5SRichard Lowe u_longlong_t st_tqs_calls; 243*101e15b5SRichard Lowe 244*101e15b5SRichard Lowe uint_t st_input_files; 245*101e15b5SRichard Lowe uint_t st_merge_files; 246*101e15b5SRichard Lowe } sort_statistics_t; 247*101e15b5SRichard Lowe 248*101e15b5SRichard Lowe typedef struct sort { 249*101e15b5SRichard Lowe stream_t *m_input_streams; 250*101e15b5SRichard Lowe char *m_output_filename; 251*101e15b5SRichard Lowe 252*101e15b5SRichard Lowe stream_t *m_temporary_streams; 253*101e15b5SRichard Lowe char *m_tmpdir_template; 254*101e15b5SRichard Lowe 255*101e15b5SRichard Lowe field_t *m_fields_head; 256*101e15b5SRichard Lowe 257*101e15b5SRichard Lowe cmp_fcn_t m_compare_fn; 258*101e15b5SRichard Lowe ssize_t (*m_coll_convert)(field_t *, line_rec_t *, flag_t, 259*101e15b5SRichard Lowe vchar_t); 260*101e15b5SRichard Lowe 261*101e15b5SRichard Lowe sort_statistics_t *m_stats; 262*101e15b5SRichard Lowe size_t m_memory_limit; 263*101e15b5SRichard Lowe size_t m_memory_available; 264*101e15b5SRichard Lowe 265*101e15b5SRichard Lowe flag_t m_check_if_sorted_only; 266*101e15b5SRichard Lowe flag_t m_merge_only; 267*101e15b5SRichard Lowe flag_t m_unique_lines; 268*101e15b5SRichard Lowe flag_t m_entire_line; 269*101e15b5SRichard Lowe 270*101e15b5SRichard Lowe enum field_species m_default_species; 271*101e15b5SRichard Lowe flag_t m_field_options; 272*101e15b5SRichard Lowe vchar_t m_field_separator; 273*101e15b5SRichard Lowe 274*101e15b5SRichard Lowe flag_t m_c_locale; 275*101e15b5SRichard Lowe flag_t m_single_byte_locale; 276*101e15b5SRichard Lowe flag_t m_input_from_stdin; 277*101e15b5SRichard Lowe flag_t m_output_to_stdout; 278*101e15b5SRichard Lowe flag_t m_verbose; 279*101e15b5SRichard Lowe } sort_t; 280*101e15b5SRichard Lowe 281*101e15b5SRichard Lowe #ifdef __cplusplus 282*101e15b5SRichard Lowe } 283*101e15b5SRichard Lowe #endif 284*101e15b5SRichard Lowe 285*101e15b5SRichard Lowe #endif /* _SORT_TYPES_H */ 286