# Read configuration file
configfile: "config/config.yml"

rule all:
    input:
        "results/result_output.tsv"

# Rule for creating the database and loading BCDM
rule create_load_db:
    input:
        bold_tsv=config["BOLD_TSV"],
        schema=config["SCHEMA"]
    output: config["DB_FILE"]
    params: log_level=config['LOG_LEVEL']
    log: "logs/create_load_db.log"
    conda: "envs/create_load_db.yaml"
    shell:
        "perl workflow/scripts/load_bcdm.pl \
            --tsv {input.bold_tsv} \
            --db {output} \
            --sql {input.schema} \
            --log {params.log_level} \
            --force 2> {log}"

rule load_criteria:
    input:
        criteria="resources/criteria.tsv",
        db=config["DB_FILE"]
    output:
        "results/criteria_loaded.ok"
    log: "logs/load_criteria.log"
    conda: "envs/sqlite.yaml"
    shell:
        """
        sqlite3 {input.db} <<CRITERIA
.mode tabs
.import {input.criteria} criteria
.quit
CRITERIA
2> {log} && touch {output}
        """

# Rule for applying indexes to the database
rule apply_indexes:
    input:
        indexes=config["INDEXES"],
        db=config["DB_FILE"],
        criteria_ok="results/criteria_loaded.ok"
    output: config["DB_FILE_INDEXED"]
    log: "logs/apply_indexes.log"
    conda: "envs/sqlite.yaml"
    shell:
        """
        sqlite3 {input.db} < {input.indexes} 2> {log} && touch {output}
        """

# Rule for producing the object-relational mapping
# XXX: This is not needed until BCDM changes
#rule produce_orm:
#    output:
#        directory="../lib"
#    shell:
#        """
#        dbicdump -o dump_directory=../lib BCDM::ORM 'dbi:SQLite:dbname={config[DB_FILE]}'
#        """

# Rule for loading taxonomy into the database
rule load_taxonomy:
    input:
        db=config["DB_FILE"],
        index_ok=config["DB_FILE_INDEXED"]
    output:
        "results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"]
    log: "logs/load_taxonomy.log"
    conda: "envs/load_taxonomy.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/load_taxonomy.pl \
            --db {input.db} \
            --log {params.log_level} 2> {log} && touch {output}
        """

# Rule for importing the target list and mapping to taxonomy
# XXX: This is not needed until the target list export is ready
# rule import_target_list_perl:
#     input:
#         perl_script="scripts/load_targetlist.pl",
#         targetlist="../resources/all_specs_and_syn.csv",
#         db="../results/bold.db"
#     output:
#         target="target_loaded.txt"
#     shell:
#         """
#         perl {input.perl_script} \\
#             --list {input.targetlist} \\
#             --db {input.db} \\
#             --log KINGDOM \\
#             --project {config[PROJECT_NAME]} \\
#             --taxon {config[TAXON_LEVEL]} \\
#             --kingdom {config[KINGDOM]}
#         """

# Rules for assessing criteria
# COLLECTION_DATE
rule COLLECTION_DATE:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="COLLECTION_DATE"
    output:
        tsv="results/assessed_COLLECTION_DATE.tsv"
    log: "logs/assess_COLLECTION_DATE.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """

# COLLECTORS
rule COLLECTORS:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="COLLECTORS"
    output:
        tsv="results/assessed_COLLECTORS.tsv"
    log: "logs/assess_COLLECTORS.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """

# COLLECTORS
rule COORD:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="COORD"
    output:
        tsv="results/assessed_COORD.tsv"
    log: "logs/assess_COORD.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """

# COUNTRY
rule COUNTRY:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="COUNTRY"
    output:
        tsv="results/assessed_COORD.tsv"
    log: "logs/assess_COUNTRY.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """

# ID_METHOD
rule ID_METHOD:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="ID_METHOD"
    output:
        tsv="results/assessed_ID_METHOD.tsv"
    log: "logs/assess_ID_METHOD.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """


# IDENTIFIER
rule IDENTIFIER:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="IDENTIFIER"
    output:
        tsv="results/assessed_IDENTIFIER.tsv"
    log: "logs/assess_IDENTIFIER.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """

# IDENTIFIER
rule INSTITUTION:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="INSTITUTION"
    output:
        tsv="results/assessed_INSTITUTION.tsv"
    log: "logs/assess_INSTITUTION.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """

# MUSEUM_ID
rule MUSEUM_ID:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="MUSEUM_ID"
    output:
        tsv="results/assessed_MUSEUM_ID.tsv"
    log: "logs/assess_MUSEUM_ID.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """


# PUBLIC_VOUCHER
rule PUBLIC_VOUCHER:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="PUBLIC_VOUCHER"
    output:
        tsv="results/assessed_PUBLIC_VOUCHER.tsv"
    log: "logs/assess_PUBLIC_VOUCHER.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """

# SEQ_QUALITY
rule SEQ_QUALITY:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="SEQ_QUALITY"
    output:
        tsv="results/assessed_SEQ_QUALITY.tsv"
    log: "logs/assess_SEQ_QUALITY.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """


# SITE
rule SITE:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="SITE"
    output:
        tsv="results/assessed_SITE.tsv"
    log: "logs/assess_SITE.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """

# SPECIES_ID
rule SPECIES_ID:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="SPECIES_ID"
    output:
        tsv="results/assessed_SPECIES_ID.tsv"
    log: "logs/assess_SPECIES_ID.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """

# TYPE_SPECIMEN
rule TYPE_SPECIMEN:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"],
        criterion="TYPE_SPECIMEN"
    output:
        tsv="results/assessed_TYPE_SPECIMEN.tsv"
    log: "logs/assess_TYPE_SPECIMEN.log"
    conda: "envs/assess_criteria.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_criteria.pl \
            --db {input.db} \
            --log {params.log_level} \
            --criteria {params.criterion} \
            2> {log} > {output.tsv}
        """

# HAS_IMAGE
rule HAS_IMAGE:
    input:
        db="results/bold.db",
        taxonomy_ok="results/taxonomy_loaded.ok"
    params:
        log_level=config['LOG_LEVEL'],
        libs=config["LIBS"]
    output:
        tsv="results/assessed_HAS_IMAGE.tsv"
    log: "logs/assess_HAS_IMAGE.log"
    conda: "envs/assess_images.yaml"
    shell:
        """
        perl -I{params.libs} workflow/scripts/assess_images.pl \
            --db {input.db} \
            --log {params.log_level} \
            2> {log} > {output.tsv}
        """


rule concatenate:
    input:
        collection_date = rules.COLLECTION_DATE.output.tsv,
        collectors = rules.COLLECTORS.output.tsv,
        coord = rules.COORD.output.tsv,
        country = rules.COUNTRY.output.tsv,
        id_method = rules.ID_METHOD.output.tsv,
        identifier = rules.IDENTIFIER.output.tsv,
        institution = rules.INSTITUTION.output.tsv,
        museum_id = rules.MUSEUM_ID.output.tsv,
        public_voucher = rules.PUBLIC_VOUCHER.output.tsv,
        seq_quality = rules.SEQ_QUALITY.output.tsv,
        site = rules.SITE.output.tsv,
        species_id = rules.SPECIES_ID.output.tsv,
        type_specimen = rules.TYPE_SPECIMEN.output.tsv,
        has_image = rules.HAS_IMAGE.output.tsv
    output:
        concat="results/CONCATENATED.tsv"
    shell:
        """
        perl workflow/scripts/concat_tsvs.pl \
            {input.collection_date} \
            {input.collectors} \
            {input.coord} \
            {input.country} \
            {input.id_method} \
            {input.identifier} \
            {input.institution} \
            {input.museum_id} \
            {input.public_voucher} \
            {input.seq_quality} \
            {input.site} \
            {input.species_id} \
            {input.type_specimen} \
            {input.has_image} \
            > {output.concat}
        """

rule import_concatenated:
    input:
        concat="results/CONCATENATED.tsv",
        db="results/bold.db"
    output:
        "results/concatenated_imported.ok"
    conda: "envs/sqlite.yaml"
    log: "logs/import_concatenated.log"
    shell:
        """
sqlite3 {input.db} 2> {log} <<IMPORT
.mode tabs
.import {input.concat} bold_criteria
.quit
IMPORT
touch {output}
        """

# Rule for outputting filtered data in BCDM        
rule output_filtered_data:
    input:
        db="results/bold.db",
        import_ok="results/concatenated_imported.ok"
    output:
        "results/result_output.tsv"
    conda: "envs/sqlite.yaml"
    log: "logs/output_filtered_data.log"
    shell:
        """
        sqlite3 {input.db} 2> {log} <<EOF
.headers ON        
.mode tabs
.output {output}
.read workflow/scripts/pivot.sql
.quit
EOF
        """




