/*===========================================================================
*
*                            PUBLIC DOMAIN NOTICE
*               National Center for Biotechnology Information
*
*  This software/database is a "United States Government Work" under the
*  terms of the United States Copyright Act.  It was written as part of
*  the author's official duties as a United States Government employee and
*  thus cannot be copyrighted.  This software/database is freely available
*  to the public for use. The National Library of Medicine and the U.S.
*  Government have not placed any restriction on its use or reproduction.
*
*  Although all reasonable efforts have been taken to ensure the accuracy
*  and reliability of the software and data, the NLM and the U.S.
*  Government do not and cannot warrant the performance or results that
*  may be obtained by using this software or data. The NLM and the U.S.
*  Government disclaim all warranties, express or implied, including
*  warranties of performance, merchantability or fitness for any particular
*  purpose.
*
*  Please cite the author in any work or product based on this material.
*
* ===========================================================================
*
*/
#include <klib/container.h>
#include <klib/log.h>
#include <klib/out.h>
#include <klib/status.h>
#include <klib/rc.h>
#include <kfs/file.h>
#include <kfs/buffile.h>
#include <kfs/gzip.h>
#include <kfs/bzip.h>
#include <kdb/manager.h>
#include <kdb/meta.h>
#include <kdb/namelist.h>
#include <kapp/main.h>
#include <kapp/args.h>
#include <insdc/insdc.h>
#include <insdc/sra.h>
#include <vdb/manager.h>
#include <vdb/database.h>
#include <vdb/table.h>
#include <vdb/cursor.h>
#include <vdb/vdb-priv.h>
#include <vdb/schema.h>
#include <sra/sraschema.h>

#include <kfs/directory.h>
#include <os-native.h>
#include <sysalloc.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>

#include "debug.h"
#include "sam-dump.vers.h"

typedef struct TAlignedRegion_struct {
    char name[1024];
    struct {
        INSDC_coord_zero from;
        INSDC_coord_zero to;
    } r[10240];
    int rq;
    INSDC_coord_zero max_to;
} TAlignedRegion;

typedef struct TMatepairDistance_struct {
    uint64_t from;
    uint64_t to;
} TMatepairDistance;

typedef struct SParam_struct {
    const char* accession;
    const char* path;
    bool use_seqid;
    bool unaligned;
    bool long_cigar;
    bool reheader;
    bool noheader;
    bool hide_identical;
    bool fasta;
    bool fastq;
    bool spot_group_in_name;
    const char* name_prefix;
    /* region filter data */
    TAlignedRegion* region;
    uint32_t region_qty;
    /* distance filter data */
    bool mp_dist_unknown;
    TMatepairDistance* mp_dist;
    uint32_t mp_dist_qty;
} SParam;

typedef union UData_union {
    const void* v;
    const uint32_t* u32;
    const int32_t* i32;
    const int64_t* i64;
    const uint8_t* u8;
    const char* str;
    INSDC_coord_one* coord1;
    INSDC_coord_zero* coord0;
    INSDC_coord_len* coord_len;
    INSDC_coord_val* coord_val;
    INSDC_SRA_xread_type* read_type;
} UData;

typedef struct SCol_struct {
    const char* name;
    uint32_t idx;
    UData base;
    uint32_t len;
    bool optional;
} SCol;

static
rc_t OpenVTable(const VDatabase* db, const VTable** tbl, const char *name, bool optional)
{
    rc_t rc = VDatabaseOpenTableRead(db, tbl, name);
    if( GetRCState(rc) == rcNotFound && optional ) {
        rc = 0;
        *tbl = NULL;
    }
    return rc;
}

static
rc_t Cursor_Init(const VTable* tbl, const VCursor** curs, SCol* cols)
{
    rc_t rc = 0;

    *curs = NULL;
    if (tbl == NULL)
        return 0;

    rc = VTableCreateCachedCursorRead(tbl, curs, 32 * 1024 * 1024);
    while(rc == 0 && cols->name != NULL) {
        if( (rc = VCursorAddColumn(*curs, &cols->idx, cols->name)) != 0 ) {
            if( GetRCState(rc) == rcExists ||
                (GetRCState(rc) == rcNotFound && cols->optional) ) {
                rc = 0;
            } else {
                PLOGERR(klogErr, (klogErr, rc, "column $(c)", PLOG_S(c), cols->name));
            }
        }
        cols++;
    }
    if( rc == 0 ) {
        rc = VCursorOpen(*curs);
    }
    if( rc != 0 ) {
        VCursorRelease(*curs);
        *curs = NULL;
    }
    return rc;
}

static
rc_t Cursor_Read(const VCursor* curs, int64_t row_id, SCol* cols)
{
    rc_t rc = 0;
    if( (rc = VCursorCloseRow(curs)) == 0 &&
        (rc = VCursorSetRowId(curs, row_id)) == 0 &&
        (rc = VCursorOpenRow(curs)) == 0 ) {
        while( rc == 0 && cols->name != NULL ) {
            if( cols->idx != 0 && (rc = VCursorCellData(curs, cols->idx, NULL, &cols->base.v, NULL, &cols->len)) != 0 ) {
                SAM_DUMP_DBG(2, ("%s: read column %s spot %u %R\n", __func__, cols->name, row_id, rc));
                break;
            }
            cols++;
        }
    }
    return rc;
}

struct {
    KWrtWriter writer;
    void* data;
    KFile* kfile;
    uint64_t pos;
} g_out_writer = {NULL};

static
rc_t CC BufferedWriter(void* self, const char* buffer, size_t bufsize, size_t* num_writ)
{
    rc_t rc = 0;

    assert(buffer != NULL);
    assert(num_writ != NULL);

    do {
        if( (rc = KFileWrite(g_out_writer.kfile, g_out_writer.pos, buffer, bufsize, num_writ)) == 0 ) {
            buffer += *num_writ;
            bufsize -= *num_writ;
            g_out_writer.pos += *num_writ;
        }
    } while(rc == 0 && bufsize > 0);
    return rc;
}

static
rc_t BufferedWriterMake(bool gzip, bool bzip2)
{
    rc_t rc = 0;

    if( gzip && bzip2 ) {
        rc = RC(rcApp, rcFile, rcConstructing, rcParam, rcAmbiguous);
    } else if( g_out_writer.writer != NULL ) {
        rc = RC(rcApp, rcFile, rcConstructing, rcParam, rcAmbiguous);
    } else if( (rc = KFileMakeStdOut(&g_out_writer.kfile)) == 0 ) {
        g_out_writer.pos = 0;
        if( gzip ) {
            KFile* gz;
            if( (rc = KFileMakeGzipForWrite(&gz, g_out_writer.kfile)) == 0 ) {
                KFileRelease(g_out_writer.kfile);
                g_out_writer.kfile = gz;
            }
        } else if( bzip2 ) {
            KFile* bz;
            if( (rc = KFileMakeBzip2ForWrite(&bz, g_out_writer.kfile)) == 0 ) {
                KFileRelease(g_out_writer.kfile);
                g_out_writer.kfile = bz;
            }
        }
        if( rc == 0 ) {
            KFile* buf;
            if( (rc = KBufFileMakeWrite(&buf, g_out_writer.kfile, false, 128 * 1024)) == 0 ) {
                KFileRelease(g_out_writer.kfile);
                g_out_writer.kfile = buf;
                g_out_writer.writer = KOutWriterGet();
                g_out_writer.data = KOutDataGet();
                rc = KOutHandlerSet(BufferedWriter, &g_out_writer);
            }
        }
    }
    return rc;
}

static
void BufferedWriterRelease(void)
{
    KFileRelease(g_out_writer.kfile);
    if( g_out_writer.writer != NULL ) {
        KOutHandlerSet(g_out_writer.writer, g_out_writer.data);
    }
    g_out_writer.writer = NULL;
}

typedef struct RefSeq {
    BSTNode node;
    char name[1024];
    char seqid[1024];
    uint32_t len;
} RefSeq;

static
int CC RefSeq_sort( const BSTNode* item, const BSTNode* node )
{
    return strcmp(((const RefSeq*)item)->name, ((const RefSeq*)node)->name);
}

typedef struct RefSeq_rowrange_data_struct {
    int64_t ref_id;
    const RefSeq* node;
} RefSeq_rowrange_data;

static
void CC RefSeq_dump( BSTNode *n, void *data )
{
    const RefSeq* r = (RefSeq*)n;
    const SParam* p = (SParam*)data;
    const char* nm;

    if( p->use_seqid && r->seqid[0] != '\0' ) {
        nm = r->seqid;
    } else {
        nm = r->name;
    }
    OUTMSG(("@SQ SN:%s", nm));
    if( nm != r->seqid && r->seqid[0] != '\0' && strcmp(nm, r->seqid) != 0 ) {
        OUTMSG((" AS:%s", r->seqid));
    }
    OUTMSG((" LN:%u\n", r->len));
}

static
rc_t CC DumpRefSeqs(const VTable* tbl, const SParam* param)
{
    rc_t rc = 0;
    const VCursor* curs;
    BSTree tree;

    SCol cols[] = {
        {"NAME", 0, {NULL}, 0, false},
        {"SEQ_ID", 0, {NULL}, 0, false},
        {"SEQ_LEN", 0, {NULL}, 0, false},
        {NULL, 0, {NULL}, 0, false}
    };
    BSTreeInit(&tree);
    if( (rc = Cursor_Init(tbl, &curs, cols)) == 0 ) {
        int64_t start;
        uint64_t count;

        if( (rc = VCursorIdRange(curs, 0, &start, &count)) == 0 ) {
            RefSeq* node = NULL;
            uint32_t last_len = 0;

            while( count > 0 && rc == 0 ) {
                if( (rc = Cursor_Read(curs, start, cols)) == 0 ) {
                    if( node == NULL || last_len != cols[0].len || strncmp(cols[0].base.str, node->name, cols[0].len) != 0 ) {
                        node = malloc(sizeof(*node));
                        if( node == NULL ) {
                            rc = RC(rcExe, rcNode, rcConstructing, rcMemory, rcExhausted);
                        } else if( cols[0].len >= sizeof(node->name) ||
                                   cols[1].len >= sizeof(node->seqid) ) {
                            rc = RC(rcExe, rcString, rcCopying, rcBuffer, rcInsufficient);
                        } else {
                            last_len = cols[0].len;
                            strncpy(node->name, cols[0].base.str, last_len);
                            strncpy(node->seqid, cols[1].base.str, cols[1].len);
                            node->name[last_len] = '\0';
                            node->seqid[cols[1].len] = '\0';
                            node->len = 0;
                            rc = BSTreeInsertUnique(&tree, &node->node, NULL, RefSeq_sort);
                        }
                    }
                    node->len += cols[2].base.coord_len[0];
                } else if( GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcRow ) {
                    /* a gap in ids? */
                    rc = 0;
                }
                start++;
                count--;
            }
        }
        VCursorRelease(curs);
    }
    if( rc == 0 ) {
        BSTreeForEach(&tree, false, RefSeq_dump, (void*)param);
    }
    BSTreeWhack(&tree, NULL, NULL);
    return rc;
}

typedef struct ReadGroup {
    BSTNode node;
    char name[1024];
} ReadGroup;

static
int CC ReadGroup_sort( const BSTNode *item, const BSTNode *node )
{
    return strcmp(((const ReadGroup*)item)->name, ((const ReadGroup*)node)->name);
}

static
void CC ReadGroup_dump( BSTNode *n, void *data )
{
    const ReadGroup* g = (ReadGroup*)n;
    OUTMSG(("@RG ID:%s\n", g->name));
}


static
rc_t CC DumpReadGroupsScan(const VTable* tbl)
{
    rc_t rc = 0;
    const VCursor* curs = NULL;
    BSTree tree;

    SCol cols[] = {
        {"SPOT_GROUP", 0, {NULL}, 0, false},
        {NULL, 0, {NULL}, 0, false}
    };

    BSTreeInit(&tree);
    if( (rc = Cursor_Init(tbl, &curs, cols)) == 0 ) {
        int64_t start;
        uint64_t count;

        if( (rc = VCursorIdRange(curs, 0, &start, &count)) == 0 ) {
            RefSeq* node = NULL;
            uint32_t last_len = 0;

            while( count > 0 && rc == 0 ) {
                if( (rc = Cursor_Read(curs, start, cols)) == 0 && cols[0].len != 0 ) {
                    if( node == NULL || last_len != cols[0].len || strncmp(cols[0].base.str, node->name, cols[0].len) != 0 ) {
                        node = malloc(sizeof(*node));
                        if( node == NULL ) {
                            rc = RC(rcExe, rcNode, rcConstructing, rcMemory, rcExhausted);
                        } else if( cols[0].len > sizeof(node->name) ) {
                            rc = RC(rcExe, rcString, rcCopying, rcBuffer, rcInsufficient);
                        } else {
                            last_len = cols[0].len;
                            strncpy(node->name, cols[0].base.str, last_len);
                            node->name[last_len] = '\0';
                            rc = BSTreeInsertUnique(&tree, &node->node, NULL, ReadGroup_sort);
                            if (GetRCState(rc) == rcExists) {
                                free(node);
                                rc = 0;
                            }
                        }
                    }
                } else if( GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcRow ) {
                    /* a gap in ids? */
                    rc = 0;
                }
                start++;
                count--;
            }
        }
        VCursorRelease(curs);
    }
    if( rc == 0 ) {
        BSTreeForEach(&tree, false, ReadGroup_dump, NULL);
    }
    BSTreeWhack(&tree, NULL, NULL);
    return rc;
}

rc_t CC DumpReadGroups(const VTable* tbl)
{
    rc_t rc = 0;
    const KMetadata* m;

    /* try getting list from stats meta */
    if( (rc = VTableOpenMetadataRead(tbl, &m)) == 0 ) {
        const KMDataNode* n;
        if( (rc = KMetadataOpenNodeRead(m, &n, "/STATS/SPOT_GROUP")) == 0 ) {
            KNamelist* names;
            if( (rc = KMDataNodeListChild(n, &names)) == 0 ) {
                uint32_t i, q;
                if( (rc = KNamelistCount(names, &q)) == 0 ) {
                    for(i = 0; rc == 0 && i < q; i++) {
                        const char* nm;
                        if( (rc = KNamelistGet(names, i, &nm)) == 0 && strcasecmp(nm, "default") ) {
                            OUTMSG(("@RG ID:%s\n", nm));
                        }
                    }
                }
                KNamelistRelease(names);
            }
            KMDataNodeRelease(n);
        }
        KMetadataRelease(m);
    }
    if( GetRCState(rc) == rcNotFound ) {
        rc = DumpReadGroupsScan(tbl);
    }
    return rc;
}

enum ealg_col {
    alg_SEQ_NAME = 0,
    alg_SAM_FLAGS,
    alg_MAPQ,
    alg_CIGAR,
    alg_MATE_REF_NAME,
    alg_MATE_REF_POS,
    alg_READ,
    alg_SAM_QUALITY,
    alg_SPOT_GROUP,
    alg_SEQ_SPOT_GROUP,
    alg_SEQ_SPOT_ID,
    alg_SEQ_READ_ID,
    alg_EDIT_DISTANCE,
    alg_REGION_FILTER,
    alg_REF_NAME = alg_REGION_FILTER,
    alg_REF_SEQ_ID,
    alg_REF_POS,
    alg_REF_LEN,
    alg_DISTANCE_FILTER,
    alg_TEMPLATE_LEN = alg_DISTANCE_FILTER
};

enum eseq_col {
    seq_READ = 0,
    seq_QUALITY,
    seq_SPOT_GROUP,
    seq_READ_START,
    seq_READ_LEN,
    seq_READ_TYPE,
    seq_NAME
};

static
void DumpName(const SParam* param, const char* name, size_t name_len,
              const char spot_group_sep, const char* spot_group, size_t spot_group_len)
{
    size_t nm;
    if( param->name_prefix != NULL ) {
        OUTMSG(("%s.", param->name_prefix));
    }
    BufferedWriter(NULL, name, name_len, &nm);
    if( param->spot_group_in_name && spot_group_len > 0 ) {
        BufferedWriter(NULL, &spot_group_sep, 1, &nm);
        BufferedWriter(NULL, spot_group, spot_group_len, &nm);
    }
}

static
void DumpUnalignedFastX(const SCol cols[], const SParam* param, uint32_t read_id,
                        INSDC_coord_zero readStart, INSDC_coord_len readLen)
{
    size_t nm;
    /* fast[AQ] represnted in SAM fields:
       [@|>]QNAME unaligned
       SEQ
       +
       QUAL
    */
    BufferedWriter(NULL, param->fastq ? "@" : ">", 1, &nm);
    /* QNAME: [PFX.]SEQUENCE:NAME[#SPOT_GROUP] */
    DumpName(param, cols[seq_NAME].base.str, cols[seq_NAME].len, '#', cols[seq_SPOT_GROUP].base.str, cols[seq_SPOT_GROUP].len);
    if( read_id > 0 ) {
        OUTMSG(("/%u", read_id));
    }
    BufferedWriter(NULL, " unaligned\n", 11, &nm);
    /* SEQ: SEQUENCE.READ */
    BufferedWriter(NULL, &cols[seq_READ].base.str[readStart], readLen, &nm);
    if( param->fastq ) {
        /* QUAL: SEQUENCE.QUALITY */
        BufferedWriter(NULL, "\n+\n", 3, &nm);
        BufferedWriter(NULL, &cols[seq_QUALITY].base.str[readStart], readLen, &nm);
    }
    BufferedWriter(NULL, "\n", 1, &nm);
}

static
void DumpAlignedFastX(const SCol cols[], const SParam* param, uint32_t read_id, bool primary)
{
    size_t nm;

    /* fast[AQ] represnted in SAM fields:
       [@|>]QNAME primary|secondary ref=RNAME pos=POS mapq=MAPQ
       SEQ
       +
       QUAL
    */
    BufferedWriter(NULL, param->fastq ? "@" : ">", 1, &nm);
    /* QNAME: [PFX.]SEQ_NAME[#SPOT_GROUP] */
    nm = cols[alg_SPOT_GROUP].len ? alg_SPOT_GROUP : alg_SEQ_SPOT_GROUP;
    DumpName(param, cols[alg_SEQ_NAME].base.str, cols[alg_SEQ_NAME].len, '#', cols[nm].base.str, cols[nm].len);
    if( read_id > 0 ) {
        OUTMSG(("/%u", read_id));
    }
    if( primary ) {
        BufferedWriter(NULL, " primary", 8, &nm);
    } else {
        BufferedWriter(NULL, " secondary", 10, &nm);
    }
    /* RNAME: REF_NAME or REF_SEQ_ID */
    BufferedWriter(NULL, " ref=", 5, &nm);
    if( param->use_seqid ) {
        BufferedWriter(NULL, cols[alg_REF_SEQ_ID].base.str, cols[alg_REF_SEQ_ID].len, &nm);
    } else {
        BufferedWriter(NULL, cols[alg_REF_NAME].base.str, cols[alg_REF_NAME].len, &nm);
    }
    /* POS: REF_POS, MAPQ: MAPQ */
    OUTMSG((" pos=%u mapq=%i\n", cols[alg_REF_POS].base.coord0[0] + 1, cols[alg_MAPQ].base.i32[0]));
    
    /* SEQ: READ */
    BufferedWriter(NULL, cols[alg_READ].base.str, cols[alg_READ].len, &nm);
    if( param->fastq ) {
        /* QUAL: SAM_QUALITY */
        BufferedWriter(NULL, "\n+\n", 3, &nm);
        BufferedWriter(NULL, cols[alg_SAM_QUALITY].base.str, cols[alg_SAM_QUALITY].len, &nm);
    }
    BufferedWriter(NULL, "\n", 1, &nm);
}

static
void DumpUnalignedSAM(const SCol cols[], const SParam* param,
                      uint16_t flags, INSDC_coord_zero readStart, INSDC_coord_len readLen,
                      const char* rnext, uint32_t rnext_len, INSDC_coord_zero pnext)
{
    size_t nm;
    /* QNAME: [PFX.]NAME[.SPOT_GROUP] */
    DumpName(param, cols[seq_NAME].base.str, cols[seq_NAME].len, '.', cols[seq_SPOT_GROUP].base.str, cols[seq_SPOT_GROUP].len);

    /* all these fields are const text for now */
    OUTMSG(("\t%u\t*\t0\t0\t*\t%.*s\t%u\t0\t", (unsigned)flags,
            rnext_len ? rnext_len : 1, rnext_len ? rnext : "*", pnext ? pnext : pnext));
    /* SEQ: SEQUENCE.READ */
    BufferedWriter(NULL, &cols[seq_READ].base.str[readStart], readLen, &nm);
    BufferedWriter(NULL, "\t", 1, &nm);
    /* QUAL: SEQUENCE.QUALITY */
    BufferedWriter(NULL, &cols[seq_QUALITY].base.str[readStart], readLen, &nm);
    /* optional fields: */
    if( cols[seq_SPOT_GROUP].len > 0 ) {
        /* read group */
        BufferedWriter(NULL, "\tRG:Z:", 6, &nm);
        BufferedWriter(NULL, cols[seq_SPOT_GROUP].base.str, cols[seq_SPOT_GROUP].len, &nm);
    }
    BufferedWriter(NULL, "\n", 1, &nm);
}

static
void DumpAlignedSAM(const SCol cols[], const SParam* param)
{
    size_t nm;
    uint32_t flags;

    /* QNAME: [SPOT_GROUP.]SEQ_NAME */
    nm = cols[alg_SPOT_GROUP].len ? alg_SPOT_GROUP : alg_SEQ_SPOT_GROUP;
    DumpName(param, cols[alg_SEQ_NAME].base.str, cols[alg_SEQ_NAME].len, '.', cols[nm].base.str, cols[nm].len);
    /* FLAG: SAM_FLAGS */
    flags = cols[alg_SAM_FLAGS].base.u32[0];
    if( !param->unaligned ) {
        flags &= ~0xC9; /* turn off 0x001 0x008 0x040 0x080 */
    }
    OUTMSG(("\t%u\t", flags));
    /* RNAME: REF_NAME or REF_SEQ_ID */
    if( param->use_seqid ) {
        BufferedWriter(NULL, cols[alg_REF_SEQ_ID].base.str, cols[alg_REF_SEQ_ID].len, &nm);
        BufferedWriter(NULL, "\t", 1, &nm);
    } else {
        BufferedWriter(NULL, cols[alg_REF_NAME].base.str, cols[alg_REF_NAME].len, &nm);
        BufferedWriter(NULL, "\t", 1, &nm);
    }
    /* POS: REF_POS */
    OUTMSG(("%u\t", cols[alg_REF_POS].base.coord0[0] + 1));
    /* MAPQ: MAPQ */
    OUTMSG(("%i\t", cols[alg_MAPQ].base.i32[0]));
    /* CIGAR: CIGAR_* */
    BufferedWriter(NULL, cols[alg_CIGAR].base.str, cols[alg_CIGAR].len, &nm);
    BufferedWriter(NULL, "\t", 1, &nm);
    
    /* RNEXT: MATE_REF_NAME or '*' */
    if( cols[alg_MATE_REF_NAME].len ) {
        if( cols[alg_MATE_REF_NAME].len == cols[alg_REF_NAME].len &&
            memcmp(cols[alg_MATE_REF_NAME].base.str, cols[alg_REF_NAME].base.str, cols[alg_MATE_REF_NAME].len) == 0 ) {
            BufferedWriter(NULL, "=\t", 2, &nm);
        } else {
            BufferedWriter(NULL, cols[alg_MATE_REF_NAME].base.str, cols[alg_MATE_REF_NAME].len, &nm);
            BufferedWriter(NULL, "\t", 1, &nm);
        }
    } else {
        BufferedWriter(NULL, "*\t", 2, &nm);
    }
    /* PNEXT: MATE_REF_POS or 0 */
    if( cols[alg_MATE_REF_POS].len ) {
        OUTMSG(("%u\t", cols[alg_MATE_REF_POS].base.coord0[0] + 1));
    } else {
        BufferedWriter(NULL, "0\t", 2, &nm);
    }
    /* TLEN: TEMPLATE_LEN */
    OUTMSG(("%i\t", cols[alg_TEMPLATE_LEN].base.i32[0]));
    /* SEQ: READ */
    BufferedWriter(NULL, cols[alg_READ].base.str, cols[alg_READ].len, &nm);
    BufferedWriter(NULL, "\t", 1, &nm);
    /* QUAL: SAM_QUALITY */
    BufferedWriter(NULL, cols[alg_SAM_QUALITY].base.str, cols[alg_SAM_QUALITY].len, &nm);

    /* optional fields: */
    if( cols[alg_SPOT_GROUP].len > 0 ) {
        /* read group */
        BufferedWriter(NULL, "\tRG:Z:", 6, &nm);
        BufferedWriter(NULL, cols[alg_SPOT_GROUP].base.str, cols[alg_SPOT_GROUP].len, &nm);
    } else if( cols[alg_SEQ_SPOT_GROUP].len > 0 ) {
        /* backward compatibility */
        BufferedWriter(NULL, "\tRG:Z:", 6, &nm);
        BufferedWriter(NULL, cols[alg_SEQ_SPOT_GROUP].base.str, cols[alg_SEQ_SPOT_GROUP].len, &nm);
    }
    /* edit distance */
    OUTMSG(("\tNM:i:%i\n", cols[alg_EDIT_DISTANCE].len ? cols[alg_EDIT_DISTANCE].base.i32[0] : 0));
}

static
rc_t DumpUnalignedSpot(const SParam* param, const SCol calg_col[], int64_t row_id,
                       const VCursor* cseq, SCol cseq_col[], int col_id, uint64_t* rcount)
{
    rc_t rc = 0;
    uint32_t i, nreads = 0;

    if( calg_col != NULL ) {
        /* get primary alignments only */
        rc = Cursor_Read(cseq, calg_col[alg_SEQ_SPOT_ID].base.i64[0], &cseq_col[col_id]);

        for(i = 0; i < cseq_col[col_id].len; i++) {
            if( cseq_col[col_id].base.i64[i] != 0 ) {
                if( cseq_col[col_id].base.i64[i] < row_id ) {
                    /* unaligned were printed with 1st aligment */
                    return rc;
                }
            } else {
                nreads++;
            }
        }
        if( nreads == cseq_col[col_id].len ) {
            /* all aligned */
            return rc;
        }
        row_id = calg_col[alg_SEQ_SPOT_ID].base.i64[0];
    }
    if( (rc = Cursor_Read(cseq, row_id, cseq_col)) == 0 ) {
        nreads = cseq_col[seq_READ_LEN].idx != 0 ? cseq_col[seq_READ_LEN].len : 1;

        for(i = 0; i < nreads; i++) {
            INSDC_coord_zero readStart;
            INSDC_coord_len readLen;

            if( calg_col != NULL && cseq_col[col_id].base.i64[i] != 0 ) {
                continue;
            }
            if( cseq_col[seq_READ_TYPE].idx != 0 && !(cseq_col[seq_READ_TYPE].base.read_type[i] & SRA_READ_TYPE_BIOLOGICAL) ) {
                continue;
            }
            readLen = cseq_col[seq_READ_LEN].idx ? cseq_col[seq_READ_LEN].base.coord_len[i] : cseq_col[seq_READ].len;
            if( readLen == 0 ) {
                continue;
            }
            readStart = cseq_col[seq_READ_START].idx ? cseq_col[seq_READ_START].base.coord0[i] : 0;
            if( param->fasta || param->fastq) {
                DumpUnalignedFastX(cseq_col, param, nreads > 1 ? i + 1 : 0, readStart, readLen);
            } else {
                if( calg_col == NULL ) {
                    DumpUnalignedSAM(cseq_col, param,
                        4 | (nreads > 1 ? 1 : 0) | (i == 0 ? 0x40 : 0) | (i == nreads - 1 ? 0x80 : 0),
                        readStart, readLen, NULL, 0, 0);
                } else {
                    int c = param->use_seqid ? alg_REF_SEQ_ID : alg_REF_NAME;
                    uint16_t flags = 1 | 4 |
                                     ((calg_col[alg_SAM_FLAGS].base.u32[0] & 0x10) << 1) |
                                     ((calg_col[alg_SAM_FLAGS].base.u32[0] & 0x40) ? 0x80 : 0x40) |
                                     (calg_col[alg_SAM_FLAGS].base.u32[0] & 0x400);
                    DumpUnalignedSAM(cseq_col, param, flags, readStart, readLen,
                        calg_col[c].base.str, calg_col[c].len, calg_col[alg_REF_POS].base.coord0[0] + 1);
                }
            }
            if( rcount != NULL ) {
                *rcount += 1;
            }
        }
    }
    return rc;
}

static
bool AlignRegionFilter(const SParam* param, const SCol* cols)
{
    if( cols[alg_REF_NAME].len != 0 || cols[alg_REF_SEQ_ID].len != 0 ) {
        uint32_t i, j, k;

        assert(cols[alg_REF_POS].len == cols[alg_REF_LEN].len);

        for(i = 0; i < param->region_qty; i++) {
            for(j = 0; j < cols[alg_REF_POS].len; j++) {
                for(k = 0; k < param->region[i].rq; k++) {
                    if( !( cols[alg_REF_POS].base.coord0[j] + cols[alg_REF_LEN].base.coord_len[j] < param->region[i].r[k].from ||
                           cols[alg_REF_POS].base.coord0[j] > param->region[i].r[k].to ) ) {
                        return true;
                    }
                }
            }
        }
    }
    return false;
}

static
bool AlignDistanceFilter(const SParam* param, const SCol* cols)
{
    if( param->mp_dist_qty != 0 || param->mp_dist_unknown ) {
        if( cols[alg_TEMPLATE_LEN].len == 0 && param->mp_dist_unknown ) {
            return true;
        } else {
            uint32_t i, j;
            for(i = 0; i < param->mp_dist_qty; i++) {
                for(j = 0; j < cols[alg_TEMPLATE_LEN].len; j++) {
                    if( (cols[alg_TEMPLATE_LEN].base.i32[j] == 0 && param->mp_dist_unknown) ||
                        (param->mp_dist[i].from <= cols[alg_TEMPLATE_LEN].base.i32[j] &&
                         cols[alg_TEMPLATE_LEN].base.i32[j] <= param->mp_dist[i].to) ) {
                        return true;
                    }
                }
            }
        }
        return false;
    }
    return true;
}

static
rc_t DumpAlignmentList(const SParam* param, const VCursor* curs, SCol* cols, const SCol* ids,
                       const VCursor* cseq, SCol cseq_col[], int col_id)
{
    rc_t rc = 0;
    uint32_t i;

    for(i = 0; rc == 0 && i < ids->len; i++) {
        if( (rc = Cursor_Read(curs, ids->base.i64[i], &cols[alg_REGION_FILTER])) == 0 ) {
            if( AlignRegionFilter(param, cols) && AlignDistanceFilter(param, cols) ) {
                if( (rc = Cursor_Read(curs, ids->base.i64[i], cols)) == 0 ) {
                    if( param->fasta || param->fastq) {
                        DumpAlignedFastX(cols, param, cols[alg_SEQ_READ_ID].base.coord1[0], cseq != NULL);
                    } else {
                        DumpAlignedSAM(cols, param);
                    }
                    if( cseq_col && param->unaligned ) {
                        rc = DumpUnalignedSpot(param, cols, ids->base.i64[i], cseq, cseq_col, col_id, NULL);
                    }
                }
            }
        } else if( GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcRow ) {
            /* a gap in ids? */
            rc = 0;
        }
        rc = rc ? rc : Quitting();
    }
    return rc;
}

static
rc_t DumpAlignments(const SParam* param, const VTable* talgP, const VTable* talgS,
                    const VTable* tref, const VTable* tseq)
{
    rc_t rc = 0;
    const VCursor *calgP = NULL, *calgS = NULL, *cseq = NULL;

    /* see enums above!!! */
    SCol calg_col[] = {
        {"SEQ_NAME", 0, {NULL}, 0, false},
        {"SAM_FLAGS", 0, {NULL}, 0, false},
        {"MAPQ", 0, {NULL}, 0, false},
        {"?CIGAR field column name?", 0, {NULL}, 0, false},
        {"MATE_REF_NAME", 0, {NULL}, 0, false},
        {"MATE_REF_POS", 0, {NULL}, 0, false},
        {"?READ filed column name?", 0, {NULL}, 0, false},
        {"SAM_QUALITY", 0, {NULL}, 0, false},
        {"SPOT_GROUP", 0, {NULL}, 0, true},
        {"SEQ_SPOT_GROUP", 0, {NULL}, 0, true},
        {"SEQ_SPOT_ID", 0, {NULL}, 0, true},
        {"SEQ_READ_ID", 0, {NULL}, 0, true},
        {"EDIT_DISTANCE", 0, {NULL}, 0, false},
        /* this are read before any other for filtering so they must be last */
        {"REF_NAME", 0, {NULL}, 0, false},
        {"REF_SEQ_ID", 0, {NULL}, 0, false},
        {"REF_POS", 0, {NULL}, 0, false},
        {"REF_LEN", 0, {NULL}, 0, false},
        {"TEMPLATE_LEN", 0, {NULL}, 0, false},
        {NULL, 0, {NULL}, 0, false}
    },
    cseq_col[] = {
        /* APPEND ONLY TO THIS LIST OF COLUMNS!!!
           unless you change enum above and SCol in DumpUnaligned below in sync!! */
        {"READ", 0, {NULL}, 0, false},
        {"(INSDC:quality:text:phred_33)QUALITY", 0, {NULL}, 0, false},
        {"SPOT_GROUP", 0, {NULL}, 0, true},
        {"READ_START", 0, {NULL}, 0, true},
        {"READ_LEN", 0, {NULL}, 0, true},
        {"READ_TYPE", 0, {NULL}, 0, true},
        {"NAME", 0, {NULL}, 0, true},
        {"PRIMARY_ALIGNMENT_ID", 0, {NULL}, 0, true},
        {NULL, 0, {NULL}, 0, false}
    };
    if( param->fasta || param->fastq ) {
        calg_col[alg_READ].name = "RAW_READ";
    } else {
        calg_col[alg_READ].name = param->hide_identical? "MISMATCH_READ" : "READ";
    }
    calg_col[alg_CIGAR].name = param->long_cigar ? "CIGAR_LONG" : "CIGAR_SHORT";

    if( (rc = Cursor_Init(talgP, &calgP, calg_col)) == 0 &&
        (rc = Cursor_Init(talgS, &calgS, calg_col)) == 0 &&
        ((param->unaligned && (rc = Cursor_Init(tseq,  &cseq, cseq_col)) == 0) || !param->unaligned) ) {

        int64_t start = 0;
        uint64_t count = 0;

        if( param->region_qty == 0 ) {
            SAM_DUMP_DBG(2, ("%s PRIMARY_ALIGNMENTs\n", param->accession));
            if( rc == 0 && (rc = VCursorIdRange(calgP, 0, &start, &count)) == 0 ) {
                uint64_t rcount = 0;

                SAM_DUMP_DBG(2, ("range from %ld qty %lu\n", start, count));
                while( count > 0 && rc == 0 ) {
                    if( (rc = Cursor_Read(calgP, start, &calg_col[alg_DISTANCE_FILTER])) == 0 ) {
                        if( AlignDistanceFilter(param, calg_col) ) {
                            if( (rc = Cursor_Read(calgP, start, calg_col)) == 0 ) {
                                if( param->fasta || param->fastq ) {
                                    DumpAlignedFastX(calg_col, param, calg_col[alg_SEQ_READ_ID].base.coord1[0], true);
                                } else {
                                    DumpAlignedSAM(calg_col, param);
                                }
                                if( param->unaligned ) {
                                    rc = DumpUnalignedSpot(param, calg_col, start, cseq, cseq_col, seq_NAME + 1, &rcount);
                                }
                            }
                        }
                    } else if( GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcRow ) {
                        /* a gap in ids? */
                        rc = 0;
                    }
                    start++;
                    count--;
                    ++rcount;
                    rc = rc ? rc : Quitting();
                }
                SAM_DUMP_DBG(2, ("%s: dumped %lu PRIMARY_ALIGNMENTs\n", param->accession, rcount));
            }
            SAM_DUMP_DBG(2, ("%s SECONDARY_ALIGNMENTs\n", param->accession));
            if( calgS != NULL && rc == 0 && (rc = VCursorIdRange(calgS, 0, &start, &count)) == 0 ) {
                uint64_t rcount = 0;

                SAM_DUMP_DBG(2, ("range from %ld qty %lu\n", start, count));
                while( count > 0 && rc == 0 ) {
                    if( (rc = Cursor_Read(calgS, start, &calg_col[alg_DISTANCE_FILTER])) == 0 ) {
                        if( AlignDistanceFilter(param, calg_col) ) {
                            if( (rc = Cursor_Read(calgS, start, calg_col)) == 0 ) {
                                if( param->fasta || param->fastq ) {
                                    DumpAlignedFastX(calg_col, param, calg_col[alg_SEQ_READ_ID].base.coord1[0], false);
                                } else {
                                    DumpAlignedSAM(calg_col, param);
                                }
                            }
                        }
                    } else if( GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcRow ) {
                        /* a gap in ids? */
                        rc = 0;
                    }
                    start++;
                    count--;
                    ++rcount;
                    rc = rc ? rc : Quitting();
                }
                SAM_DUMP_DBG(2, ("%s: dumped %lu SECONDARY_ALIGNMENTs\n", param->accession, rcount));
            }
        } else {
            /* use index to set REF_NAME ranges */
            uint32_t r;
            const KIndex* iname = NULL;
            const VCursor *cref = NULL;

            SCol cref_col[] = {
                {"MAX_SEQ_LEN", 0, {NULL}, 0, false},
                {"PRIMARY_ALIGNMENT_IDS", 0, {NULL}, 0, false},
                {"SECONDARY_ALIGNMENT_IDS", 0, {NULL}, 0, false},
                {NULL, 0, {NULL}, 0, false}
            };
            enum eref_col {
                ref_MAX_SEQ_LEN = 0,
                ref_PRIMARY_ALIGNMENT_IDS,
                ref_SECONDARY_ALIGNMENT_IDS
            };

            if( (rc = VTableOpenIndexRead(tref, &iname, "i_name")) == 0 &&
                (rc = Cursor_Init(tref, &cref, cref_col)) == 0 ) {
                for(r = 0; rc == 0 && r < param->region_qty; r++ ) {
                    if( (rc = KIndexFindText(iname, param->region[r].name, &start, &count, NULL, NULL)) == 0 ) {
                        bool skip_initial = true;
                        uint64_t cur_pos = 0;
                        uint32_t max_seq_len = 0;

                        SAM_DUMP_DBG(2, ("REFERENCE %s index range is [%lu:%lu]\n", param->region[r].name, start, start + count - 1));
                        while( count > 0 && rc == 0 ) {
                            if( (rc = Cursor_Read(cref, start, cref_col)) == 0 ) {
                                if( skip_initial ) {
                                    /* scroll to row with 1st region offset - 1 so algnmts tails in the range are not lost */
                                    uint64_t inc = param->region[r].r[0].from / cref_col[ref_MAX_SEQ_LEN].base.u32[0];
                                    max_seq_len = cref_col[ref_MAX_SEQ_LEN].base.u32[0];
                                    skip_initial = false;
                                    inc = inc ? inc - 1 : 0;
                                    if( start + inc != start ) {
                                        start += inc;
                                        count -= inc;
                                        cur_pos = max_seq_len * inc;
                                        continue;
                                    }
                                } else if( cur_pos > param->region[r].max_to ) {
                                    break;
                                }
                                SAM_DUMP_DBG(2, ("row %s index range is [%lu:%lu] pos %lu\n",
                                    param->region[r].name, start, start + count - 1, cur_pos));
                                if( (rc = DumpAlignmentList(param, calgP, calg_col, &cref_col[ref_PRIMARY_ALIGNMENT_IDS],
                                                            cseq, cseq_col, seq_NAME + 1)) == 0 ) {
                                    if( calgS != NULL ) {
                                        rc = DumpAlignmentList(param, calgS, calg_col,
                                                               &cref_col[ref_SECONDARY_ALIGNMENT_IDS], NULL, NULL, 0);
                                    }
                                }
                            } else if( GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcRow ) {
                                /* a gap in ids? */
                                rc = 0;
                            }
                            start++;
                            count--;
                            cur_pos += max_seq_len;
                            rc = rc ? rc : Quitting();
                        }
                    } else if( GetRCState(rc) == rcNotFound ) {
                        PLOGMSG(klogWarn, (klogWarn, "REFERENCE $(r) not present in data", "r=%s", param->region[r].name));
                        rc = 0;
                    }
                }
            }
            VCursorRelease(cref);
            KIndexRelease(iname);
        }
    }
    VCursorRelease(calgP);
    VCursorRelease(calgS);
    VCursorRelease(cseq);
    return rc;
}

static
rc_t DumpUnaligned(const SParam* param, const VTable* tseq)
{
    rc_t rc = 0;
    const VCursor* cflt = NULL, *cseq = NULL;

    SCol cseq_flt[] = {
        {"PRIMARY_ALIGNMENT_ID", 0, {NULL}, 0, true},
        {NULL, 0, {NULL}, 0, false}
    },
    cseq_col[] = {
        /* APPEND ONLY TO THIS LIST OF COLUMNS!!!
           unless you change enum above and SCol in DumpAlignments above in sync!! */
        {"READ", 0, {NULL}, 0, false},
        {"(INSDC:quality:text:phred_33)QUALITY", 0, {NULL}, 0, false},
        {"SPOT_GROUP", 0, {NULL}, 0, true},
        {"READ_START", 0, {NULL}, 0, true},
        {"READ_LEN", 0, {NULL}, 0, true},
        {"READ_TYPE", 0, {NULL}, 0, true},
        {"NAME", 0, {NULL}, 0, true},
        {NULL, 0, {NULL}, 0, false}
    };
    enum eseq_flt {
        seq_PRIMARY_ALIGNMENT_ID = 0
    };

    if( (rc = Cursor_Init(tseq, &cflt, cseq_flt)) == 0 &&
        (rc = Cursor_Init(tseq, &cseq, cseq_col)) == 0 ) {
        int64_t start = 0;
        uint64_t count = 0;
        if( (rc = VCursorIdRange(cseq, 0, &start, &count)) == 0 ) {
            uint64_t rcount = 0;

            SAM_DUMP_DBG(2, ("%s SEQUENCEs without PRIMARY_ALIGNMENT_ID or equal 0\n", param->accession));
            SAM_DUMP_DBG(2, ("range from %ld qty %lu\n", start, count));
            while( count > 0 && rc == 0 ) {
                uint32_t i, nreads = 1;

                /* to avoid reading whole sequence cursor data unnecessarily
                   nreads is used as flag based on PRIMARY_ALIGNMENT_ID column presence and values */
                if( cseq_flt[seq_PRIMARY_ALIGNMENT_ID].idx != 0 ) {
                    if( (rc = Cursor_Read(cflt, start, cseq_flt)) == 0 ) {
                        nreads = 1;
                        /* count unaligned reads on spot */
                        for(i = 0; i < cseq_flt[seq_PRIMARY_ALIGNMENT_ID].len; i++) {
                            if( cseq_flt[seq_PRIMARY_ALIGNMENT_ID].base.i64[i] != 0 ) {
                                /* skip partially unaligned spots, they are printed 
                                   together with aligned read (with lowest alignment id)
                                   in PRIMARY_ALIGNMENT loop */
                                nreads = 0;
                                break;
                            }
                        }
                    }
                }
                if( rc == 0 && nreads != 0 ) {
                    DumpUnalignedSpot(param, NULL, start, cseq, cseq_col, 0, &rcount);
                } else if( GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcRow ) {
                    /* a gap in ids? */
                    rc = 0;
                }
                start++;
                count--;
                rc = rc ? rc : Quitting();
            }
            SAM_DUMP_DBG(2, ("%s: dumped %lu unaligned reads\n", param->accession, rcount));
        }
    }
    VCursorRelease(cseq);
    return rc;
}

static
rc_t Dump(uint32_t idx, bool multi_run, SParam* param)
{
    rc_t rc = 0;
    const VDBManager* mgr = NULL;
    const KDBManager* kmgr = NULL;

    if( (rc = VDBManagerMakeRead(&mgr, NULL)) == 0 &&
        (rc = VDBManagerOpenKDBManagerRead(mgr, &kmgr)) == 0 ) {

        int pathType = KDBManagerPathType(kmgr, param->path) & ~kptAlias;
        if( pathType == kptDatabase ) {
            const VDatabase* db;
            if( (rc = VDBManagerOpenDBRead(mgr, &db, NULL, param->path)) == 0 ) {
                const VTable* seq = NULL, *algP = NULL, *algS = NULL, *ref = NULL;
                if( (rc = OpenVTable(db, &ref, "REFERENCE", false)) == 0 &&
                    (rc = OpenVTable(db, &seq, "SEQUENCE", false)) == 0 &&
                    (rc = OpenVTable(db, &algP, "PRIMARY_ALIGNMENT", false)) == 0 &&
                    (rc = OpenVTable(db, &algS, "SECONDARY_ALIGNMENT", true)) == 0 ) {

                    if( !param->noheader && !param->reheader && !multi_run ) {
                        /* grab header from db meta node */
                        const KMetadata* m;
                        if( (rc = VDatabaseOpenMetadataRead(db, &m)) == 0 ) {
                            const KMDataNode* n;
                            if( (rc = KMetadataOpenNodeRead(m, &n, "BAM_HEADER")) == 0 ) {
                                size_t offset = 0, num_read, remaining = ~0;
                                char buffer[40960];
                                while(rc == 0 && remaining > 0 ) {
                                    if( (rc = KMDataNodeRead(n, offset, buffer, sizeof(buffer),
                                                             &num_read, &remaining)) == 0 ) {
                                        OUTMSG(("%.*s", ( uint32_t ) num_read, buffer));
                                        offset += num_read;
                                    }
                                }
                                if( rc == 0 && buffer[num_read - 1] != '\n' ) {
                                    OUTMSG(("\n"));
                                }
                                KMDataNodeRelease(n);
                            } else if( GetRCState(rc) == rcNotFound ) {
                                param->reheader = true;
                                rc = 0;
                            }
                            KMetadataRelease(m);
                        }
                    }
                    if( rc == 0 && !param->noheader && (param->reheader || multi_run) ) {
                        if( !multi_run || idx == 0 ) {
                            OUTMSG(("@HD VN:1.3\n"));
                        }
                        if( !multi_run && (rc = DumpRefSeqs(ref, param)) == 0 ) {
                            rc = DumpReadGroups(seq);
                        }
                    }
                    if( rc == 0 ) {
                        rc = DumpAlignments(param, algP, algS, ref, seq);
                        if( rc == 0 && param->unaligned ) {
                            rc = DumpUnaligned(param, seq);
                        }
                    }
                }
                VTableRelease(ref);
                VTableRelease(seq);
                VTableRelease(algP);
                VTableRelease(algS);
                VDatabaseRelease(db);
            }
        } else {
            const VTable* seq = NULL;
            VSchema* schema = NULL;

UseLegacy:
            if( (rc = VDBManagerOpenTableRead(mgr, &seq, schema, param->path)) == 0 ) {
                OUTMSG(("@HD VN:1.3\n"));
                if( (rc = DumpReadGroups(seq)) == 0 ) {
                    rc = DumpUnaligned(param, seq);
                }
                VTableRelease(seq);
            }
            if( rc != 0 && schema == NULL ) {
                if( (rc = VDBManagerMakeSRASchema(mgr, &schema)) == 0 ) {
                    goto UseLegacy;
                }
            }
            VSchemaRelease(schema);
        }
    }
    VDBManagerRelease(mgr);
    KDBManagerRelease(kmgr);
    return rc;
}

ver_t CC KAppVersion( void )
{
    return SAM_DUMP_VERS;
}

const char* seq_id_usage[] = {"Print reference SEQ_ID in RNAME instead of NAME", NULL};
const char* unaligned_usage[] = {"Output unaligned reads", NULL};
const char* cigartype_usage[] = {"Output long version of CIGAR", NULL};
const char* header_usage[] = {"Always reconstruct header", NULL};
const char* noheader_usage[] = {"Do not output headers", NULL};
const char* region_usage[] = {"Filter by position on genome.",
                              "Name can either be file specific name (ex: \"chr1\" or \"1\").",
                              "\"from\" and \"to\" are 1-based coordinates", NULL};
const char* distance_usage[] = {"Filter by distance between matepairs.",
                                "Use \"unknown\" to find matepairs split between the references.",
                                "Use from-to to limit matepair distance on the same reference", NULL};
const char* identicalbases_usage[] = {"Output '=' if base is identical to reference", NULL};
const char* gzip_usage[] = {"Compress output using gzip", NULL};
const char* bzip2_usage[] = {"Compress output using bzip2", NULL};
const char* qname_usage[] = {"Add .SPOT_GROUP to QNAME", NULL};
const char* fasta_usage[] = {"Produce Fasta formatted ouput", NULL};
const char* fastq_usage[] = {"Produce FastQ formatted ouput", NULL};
const char* prefix_usage[] = {"Prefix QNAME: prefix.QNAME", NULL};

const char* usage_params[] =
{
    NULL,
    NULL,
    NULL,
    NULL,
    "name[:from-to]",
    "from-to|unknown",
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    "<prefix>"
};

enum eArgs {
    earg_seq_id = 0,
    earg_unaligned,
    earg_cigartype,
    earg_header,
    earg_region,
    earg_distance,
    earg_identicalbases,
    earg_gzip,
    earg_bzip2,
    earg_qname,
    earg_noheader,
    earg_fastq,
    earg_fasta,
    earg_prefix
};

OptDef DumpArgs[] =
{
    {"seqid", "s", NULL, seq_id_usage, 0, false, false},
    {"unaligned", "u", NULL, unaligned_usage, 0, false, false},
    {"cigar-long", "c", NULL, cigartype_usage, 0, false, false},
    {"header", "r", NULL, header_usage, 0, false, false},
    {"aligned-region", NULL, NULL, region_usage, 0, true, false},
    {"matepair-distance", NULL, NULL, distance_usage, 0, true, false},
    {"hide-identical", "=", NULL, identicalbases_usage, 0, false, false},
    {"gzip", NULL, NULL, gzip_usage, 0, false, false},
    {"bzip2", NULL, NULL, bzip2_usage, 0, false, false},
    {"spot-group", "g", NULL, qname_usage, 0, false, false},
    {"no-header", "n", NULL, noheader_usage, 0, false, false},
#ifdef NCBI
    {"fastq", NULL, NULL, fasta_usage, 0, false, false},
    {"fasta", NULL, NULL, fastq_usage, 0, false, false},
#else
    {"fastq", NULL, NULL, NULL, 0, false, false},
    {"fasta", NULL, NULL, NULL, 0, false, false},
#endif
    {"prefix", "p", NULL, prefix_usage, 0, true, false},
};

const char UsageDefaultName[] = "sam-dump";

rc_t CC UsageSummary (const char * progname)
{
    return KOutMsg ( "Usage:\n"
        "\t%s [options] path[ path ...]\n\n", progname );
    return 0;
}


rc_t CC Usage( const Args* args )
{
    const char * progname = UsageDefaultName;
    const char * fullpath = UsageDefaultName;
    rc_t rc;
    int i;

    rc = ArgsProgram(args, &fullpath, &progname);

    OUTMSG (( "\nUsage:\n"
        "\t%s [options] path[ path ...]\n\n", progname));

    OUTMSG (("Options:\n"));
    for(i = 0; i < sizeof(DumpArgs)/sizeof(DumpArgs[0]); i++ ) {
        if( DumpArgs[i].help != NULL ) {
            HelpOptionLine(DumpArgs[i].aliases, DumpArgs[i].name, usage_params[i], DumpArgs[i].help);
        }
    }
    OUTMSG (("\n"));
    HelpOptionsStandard();

    HelpVersion(fullpath, KAppVersion());

    return rc;
}

rc_t CC KMain( int argc, char* argv[] )
{
    rc_t rc = 0;
    Args* args;
    const char* errmsg = "stop";

    memset(&g_out_writer, 0, sizeof(g_out_writer));
    KOutHandlerSetStdOut();
    KStsHandlerSetStdErr();
    KLogHandlerSetStdErr();
    ( void ) KDbgHandlerSetStdErr();

    if( (rc = ArgsMakeAndHandle(&args, argc, argv, 1, DumpArgs, sizeof(DumpArgs)/sizeof(DumpArgs[0]))) == 0 ) {
        SParam params;
        uint32_t pcount, count[sizeof(DumpArgs)/sizeof(DumpArgs[0])];

        memset(&params, 0, sizeof(params));
        if( (rc = ArgsParamCount(args, &pcount)) != 0 || pcount < 1 ) {
            errmsg = "";
            rc = RC(rcExe, rcArgv, rcParsing, rcParam, rcInsufficient);
            MiniUsage(args);
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_seq_id].name, &count[earg_seq_id])) != 0 ) {
            errmsg = DumpArgs[earg_seq_id].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_unaligned].name, &count[earg_unaligned])) != 0 ) {
            errmsg = DumpArgs[earg_unaligned].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_cigartype].name, &count[earg_cigartype])) != 0 ) {
            errmsg = DumpArgs[earg_cigartype].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_header].name, &count[earg_header])) != 0 ) {
            errmsg = DumpArgs[earg_header].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_region].name, &count[earg_region])) != 0 ) {
            errmsg = DumpArgs[earg_region].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_distance].name, &count[earg_distance])) != 0 ) {
            errmsg = DumpArgs[earg_distance].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_identicalbases].name, &count[earg_identicalbases])) != 0 ) {
            errmsg = DumpArgs[earg_identicalbases].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_gzip].name, &count[earg_gzip])) != 0 ) {
            errmsg = DumpArgs[earg_gzip].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_bzip2].name, &count[earg_bzip2])) != 0 ) {
            errmsg = DumpArgs[earg_bzip2].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_qname].name, &count[earg_qname])) != 0 ) {
            errmsg = DumpArgs[earg_qname].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_noheader].name, &count[earg_noheader])) != 0 ) {
            errmsg = DumpArgs[earg_noheader].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_fastq].name, &count[earg_fastq])) != 0 ) {
            errmsg = DumpArgs[earg_fastq].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_fasta].name, &count[earg_fasta])) != 0 ) {
            errmsg = DumpArgs[earg_fasta].name;
        } else if( (rc = ArgsOptionCount(args, DumpArgs[earg_prefix].name, &count[earg_prefix])) != 0 || count[earg_prefix] > 1) {
            rc = rc ? rc : RC(rcExe, rcArgv, rcParsing, rcParam, rcExcessive);
            errmsg = DumpArgs[earg_prefix].name;
        } else if( count[earg_prefix] > 0 && (rc = ArgsOptionValue(args, DumpArgs[earg_prefix].name, 0, &params.name_prefix)) != 0 ) {
            errmsg = DumpArgs[earg_prefix].name;
        } else {
            uint32_t p, i;
            const char* arg;

            for(p = 0; rc == 0 && p < count[earg_region]; p++) {
                /* name[:[from][-[to]]] 1-based!!! */
                TAlignedRegion r;

                errmsg = DumpArgs[earg_region].name;
                if( (rc = ArgsOptionValue(args, DumpArgs[earg_region].name, p, &arg)) == 0 ) {
                    const char* c = strchr(arg, ':');
                    if( c == NULL ) {
                        strncpy(r.name, arg, sizeof(r.name));
                        r.rq = 0;
                    } else {
                        INSDC_coord_zero* v;

                        r.r[0].from = (c - arg) > sizeof(r.name) ? sizeof(r.name) : c - arg;
                        strncpy(r.name, arg, r.r[0].from);
                        r.name[r.r[0].from] = '\0';
                        r.rq = 1;
                        r.r[0].from = -1;
                        r.r[0].to = -1;
                        r.max_to = 0;
                        v = &r.r[0].from;
                        while(rc == 0 && *++c != '\0') {
                            if( *c == '-' ) {
                                v = &r.r[0].to;
                            } else if( *c == '+' ) {
                                if( *v != 0 ) {
                                    rc = RC(rcExe, rcArgv, rcProcessing, rcParam, rcOutofrange);
                                }
                            } else if( !isdigit(*c) ) {
                                rc = RC(rcExe, rcArgv, rcProcessing, rcParam, rcOutofrange);
                            } else {
                                if( *v == -1 ) {
                                    *v = 0;
                                }
                                *v = *v * 10 + (*c - '0');
                            }
                        }
                        /* convert to 0-based offset */
                        if( r.r[0].from > 0 ) {
                            r.r[0].from--;
                        } else if( r.r[0].to > 0 ) {
                            r.r[0].from = 0;
                        }
                        if(r.r[0].to > 0 ) {
                            r.r[0].to--;
                        } else if( r.r[0].from >= 0 && r.r[0].to < 0 ) {
                            r.r[0].to = r.r[0].from;
                        }
                        if( r.r[0].from < 0 && r.r[0].to < 0 ) {
                            r.rq = 0;
                        } else if( r.r[0].from > r.r[0].to ) {
                            uint64_t x = r.r[0].from;
                            r.r[0].from = r.r[0].to;
                            r.r[0].to = x;
                        }
                    }
                    if( rc == 0 ) {
                        TAlignedRegion* x = NULL;
                        for(i = 0; i < params.region_qty; i++) {
                            if( strcmp(params.region[i].name, r.name) == 0 ) {
                                x = &params.region[i];
                                break;
                            }
                        }
                        if( x == NULL ) {
                            if( (x = realloc(params.region, sizeof(*params.region) * ++params.region_qty)) == NULL ) {
                                rc = RC(rcExe, rcArgv, rcProcessing, rcMemory, rcExhausted);
                            } else {
                                params.region = x;
                                memcpy(&params.region[params.region_qty - 1], &r, sizeof(r));
                            }
                        } else {
                            int32_t k = x->rq;
                            for(i = 0; i < x->rq; i++) {
                                /* sort by from asc */
                                if( r.r[0].from <= x->r[i].from ) {
                                    k = i;
                                    break;
                                }
                            }
                            if( k >= 0 ) {
                                /* insert at k position */
                                if( x->rq >= sizeof(x->r) / sizeof(x->r[0]) ) {
                                    rc = RC(rcExe, rcArgv, rcProcessing, rcBuffer, rcInsufficient);
                                } else {
                                    memmove(&x->r[k + 1], &x->r[k], sizeof(x->r[0]) * (x->rq - k));
                                    x->r[k].from = r.r[0].from;
                                    x->r[k].to = r.r[0].to;
                                    x->rq++;
                                }
                            }
                        }
                    }
                }
            }
            for(p = 0; p < params.region_qty; p++) {
                SAM_DUMP_DBG(2, ("filter by %s\n", params.region[p].name));
                if( params.region[p].rq == 0 ) {
                    params.region[p].rq = 1;
                    params.region[p].r[0].from = 0;
                    params.region[p].r[0].to = 0x7FFFFFFF;
                }
                for(i = 0; i < params.region[p].rq; i++) {
                    SAM_DUMP_DBG(2, ("   range: [%u:%u]\n", params.region[p].r[i].from, params.region[p].r[i].to));
                    if( params.region[p].max_to < params.region[p].r[i].to ) {
                        params.region[p].max_to = params.region[p].r[i].to;
                    }
                }
            }
            for(p = 0; rc == 0 && p < count[earg_distance]; p++) {
                /* from[-to] | [from]-to | unknown */
                errmsg = DumpArgs[earg_distance].name;
                if( (rc = ArgsOptionValue(args, DumpArgs[earg_distance].name, p, &arg)) != 0 ) {
                } else if( strcasecmp(arg, "unknown") == 0 ) {
                    params.mp_dist_unknown = true;
                } else {
                    TMatepairDistance* p;
                    if( (p = realloc(params.mp_dist, sizeof(*params.mp_dist) * ++params.mp_dist_qty)) == NULL ) {
                        rc = RC(rcExe, rcArgv, rcProcessing, rcMemory, rcExhausted);
                    } else {
                        uint64_t* v;
                        params.mp_dist = p;
                        p = &params.mp_dist[params.mp_dist_qty - 1];
                        p->from = 0;
                        p->to = 0;
                        v = &p->from;
                        while(rc == 0 && *arg != '\0') {
                            if( *arg == '-' ) {
                                v = &p->to;
                            } else if( *arg == '+' ) {
                                if( *v != 0 ) {
                                    rc = RC(rcExe, rcArgv, rcProcessing, rcParam, rcOutofrange);
                                }
                            } else if( !isdigit(*arg) ) {
                                rc = RC(rcExe, rcArgv, rcProcessing, rcParam, rcOutofrange);
                            } else {
                                *v = *v * 10 + (*arg - '0');
                            }
                            arg++;
                        }
                        if( p->from > p->to && p->to != 0 ) {
                            uint64_t x = p->from;
                            p->from = p->to;
                            p->to = x;
                        }
                        if( p->from == 0 && p->to == 0 ) {
                            params.mp_dist_qty--;
                        }
                    }
                }
            }
            for(p = 0; p < params.mp_dist_qty; p++) {
                if( params.mp_dist_unknown ) {
                    SAM_DUMP_DBG(2, ("distance 'unknown'\n"));
                }
                SAM_DUMP_DBG(2, ("distance [%lu-%lu]\n", params.mp_dist[p].from, params.mp_dist[p].to));
            }
            params.use_seqid = (count[earg_seq_id] > 0) || (pcount > 1);
            params.unaligned = count[earg_unaligned] > 0;
            params.long_cigar = count[earg_cigartype] > 0;
            params.reheader = count[earg_header] > 0;
            params.hide_identical = count[earg_identicalbases] > 0;
            params.fasta = count[earg_fasta] > 0;
            params.fastq = count[earg_fastq] > 0;
            params.spot_group_in_name = (count[earg_qname] > 0) || (pcount > 1);
            params.noheader = (count[earg_noheader] > 0) || params.fasta || params.fastq;
            
            if( rc == 0 ) {
                rc = BufferedWriterMake(count[earg_gzip] > 0, count[earg_bzip2] > 0);
            }
            for(p = 0; rc == 0 && p < pcount; p++) {
                if( (rc = ArgsParamValue(args, p, &params.path)) == 0 ) {
                    int i;
                    params.accession = params.path;
                    /* remove trailing /\ */
                    for(i = strlen(params.path) - 1; i >= 0; i--) {
                        if( params.path[i] != '/' && params.path[i] != '\\' ) {
                            break;
                        }
                        ((char*)params.path)[i] = '\0';
                    }
                    /* use last path element as accession */
                    for(i = strlen(params.path) - 1; i >= 0; i--) {
                        if( params.path[i] == '/' || params.path[i] == '\\' ) {
                            params.accession = &params.path[i + 1];
                            break;
                        }
                    }
                    rc = Dump(p, pcount > 1, &params);
                }
            }
            BufferedWriterRelease();

        }
        ArgsWhack(args);
    }
    if( rc != 0 && errmsg[0] ) {
        LOGERR(klogErr, rc, errmsg);
    }
    return rc;
}
