# =============================================================================
# Snakefile — comprehensive feature test for Snakemake highlighting
# Minimal comments; lines marked `# ERROR:` are intentionally invalid.
# =============================================================================

# ---------- Top-level Python ----------

if True: forbidden # ERROR: no text allowed after block opener
    a = 1
else: forbidden # ERROR: same
    a = 0

config = {
    "method": "B",
    "samples": ["S1", "S2"],
    "ref": "ref/genome.fa",
    "gtf": "ref/genes.gtf",
    "threads_default": 4,
    "work": "work",
    "adapters": {"fwd": "AGATCGGAAGAGC", "rev": "AGATCGGAAGAGC"}
}
SAMPLES = config["samples"]

def fq(sample, mate):
    return f"raw/{sample}_R{mate}.fastq.gz"

# ---------- Global Snakemake directives ----------
workdir: config["work"]
localrules: all, qc_fastqc
ruleorder: sort_index > align
report: "report/report.html"
envvars: "LD_LIBRARY_PATH", "OMP_NUM_THREADS"
wildcard_constraints:
    sample = r"[A-Za-z0-9_]+"

include: "extras.smk"
container: "docker://ubuntu:22.04"
conda: "envs/global.yml"
containerized:

# Legacy/compat directives still recognized in old workflows
moduleinclude: "legacy/tools.smk"   # legacy include for modules
# deprecated (kept for legacy)
subworkflow oldwf:  something #ERROR: nothing allowed after colon
    workdir: "oldwf"
    snakefile: "workflow/Snakefile"
    input: # ERROR: `input` directive not allowed in subworkflows

# ---------- Modules and use rule ----------
module asm: something #ERROR: nothing allowed after colon
    snakefile: "modules/assembly.smk"
    config: "modules/assembly.yaml"
    input: "test" # ERROR: `input` directive not allowed in modules

# Reuse a rule from the module; inside `with:` we use regular rule directives
use rule assemble as assemble_mod with: something #ERROR: nothing allowed after colon
    snakefile: # ERROR `snakefile` directive not allowed in rules
    threads: 8
    params:
        mode = "quick"
    message:
        "Assembling {wildcards.sample} (module override)"
    # wrong directive (kept to test error handling inside a use-body)
    outpt: "SHOULD-NOT-BE-HERE"   # ERROR: typo

# ---------- INTENTIONAL TOP-LEVEL ERRORS ----------
workdirr: "typo/dir"              # ERROR: unknown top-level keyword
snakefile: "top/level.smk"        # ERROR: only valid inside module/subworkflow

# ---------- Pipeline ----------
rule all:
    input:
        expand("results/{sample}/summary.txt", sample=SAMPLES)

rule qc_fastqc:
    input:
        r1 = lambda wc: fq(wc.sample, 1),
        r2 = lambda wc: fq(wc.sample, 2),
    output:
        html = "qc/{sample}_fastqc.html",
        zip  = "qc/{sample}_fastqc.zip",
    threads: 2
    resources:
        mem_mb = 1024
    log:
        "log/fastqc_{sample}.log"
    params:
        extra = "--nogroup"
    shell:
        """
        fastqc -t {threads} {params.extra} -o qc {input.r1} {input.r2} > {log} 2>&1
        """

rule trim_cutadapt:
    input:
        r1 = lambda wc: fq(wc.sample, 1),
        r2 = lambda wc: fq(wc.sample, 2),
    output:
        r1 = "trim/{sample}_R1.fastq.gz", # `sample`: wildcard
        r2 = "trim/{sample}_R2.fastq.gz",
        report = f"{report_dir}/{{sample}}" # `report_dir`: f-string interpolation, `sample`: wildcard
    params:
        a = config["adapters"]["fwd"],
        A = config["adapters"]["rev"],
    threads: 8
    conda:
        "envs/cutadapt.yml"
    log:
        "log/cutadapt_{sample}.log"
    shell:
        """
        cutadapt -j {threads} -a {params.a} -A {params.A} \
            -o {output.r1} -p {output.r2} {input.r1} {input.r2} > {log} 2>&1
        """

# Example of wrapper usage (version string illustrative)
rule align:
    input:
        r1 = "trim/{sample}_R1.fastq.gz",
        r2 = "trim/{sample}_R2.fastq.gz",
        ref = config["ref"],
    output:
        bam = "map/{sample}.unsorted.bam",
    threads: 12
    resources:
        mem_mb = 8000
    params:
        # BWA-MEM2 example options
        extra = "-M"
    log:
        "log/align_{sample}.log"
    wrapper:
        "0.90.0/bio/bwa/mem2"
    shell:
        "bwa-mem2 mem -t {threads} {params.extra} {input.ref} {input.r1} {input.r2} | samtools view -bS - > {output.bam} 2> {log}"

rule sort_index:
    input:
        "map/{sample}.unsorted.bam"
    output:
        bam = "map/{sample}.bam",
        bai = "map/{sample}.bam.bai",
    threads: 6
    resources:
        mem_mb = 4000
    envmodules:
        "samtools/1.16"
    shadow:
        "minimal"
    shell:
        """
        samtools sort -@ {threads} -o {output.bam} {input}
        samtools index -@ {threads} {output.bam}
        """

rule quantify:
    input:
        bam = "map/{sample}.bam",
        bai = "map/{sample}.bam.bai",
        gtf = config["gtf"],
    output:
        counts = "counts/{sample}.txt",
    threads: 4
    group:
        "counting"
    priority:
        50
    params:
        feature_type = "exon",
        id_attr = "gene_id",
    shell:
        """
        featureCounts -T {threads} -a {input.gtf} -t {params.feature_type} -g {params.id_attr} \
            -o {output.counts} {input.bam}
        """

# Example of script & notebook directives
rule plot_qc:
    input:
        "qc/{sample}_fastqc.zip"
    output:
        "plots/{sample}_qc.png"
    script:
        "scripts/plot_qc.py"    # not executed; present to test directive

rule explore_notebook:
    input:
        "counts/{sample}.txt"
    output:
        "notebooks/{sample}_eda.ipynb"
    notebook:
        "notebooks/template.ipynb"

# Example of per-rule container / cache / benchmark / message / name / version (legacy)
rule summarize:
    input:
        bam = "map/{sample}.bam",
        counts = "counts/{sample}.txt"
    output:
        txt = "results/{sample}/summary.txt"
    params:
        tag = "{sample}"        # wildcard should highlight distinctly
    message:
        "Summarizing {wildcards.sample}"
    name:
        "summarize_{sample}"
    benchmark:
        "benchmark/summarize_{sample}.tsv"
    cache:
        "permissive"
    container:
        "docker://python:3.11"
    version: "1.0"            # legacy directive
    threads: 2
    resources:
        mem_mb = 512
    log:
        "log/summarize_{sample}.log"
    run:
        # simple Python run block
        import json
        meta = {
            "sample": wildcards.sample,
            "bam": input.bam,
            "counts": input.counts,
            "tag": params.tag,
        }
        # write a tiny summary
        import os
        os.makedirs(os.path.dirname(output.txt), exist_ok=True)
        with open(output.txt, "w") as fh:
            fh.write(json.dumps(meta, indent=2) + "\n")

# ---------- More intentional errors inside a rule body ----------

if config["method"] == "A":
    
    rule bad_header_examples_A:
        input:
            "map/{sample}.bam"
        outpt:      # ERROR: unknown directive
            "nowhere.txt"
        foo:        # ERROR: unknown directive
            "bar"
        shell:
            "true"
else:
    rule bad_header_examples_B:
        input:
            "map/{sample}.bam"
        output:      # ERROR: unknown directive
            "nowhere.txt"
        foo:        # ERROR: unknown directive
            "bar"
        shell:
            "true"

# ---------- Using the module rule  ----------
rule assemble_via_module:
    input:
        "trim/{sample}_R1.fastq.gz",
        "trim/{sample}_R2.fastq.gz",
    output:
        "assembly/{sample}/contigs.fa"
    threads: 8
    shell:
        "echo assembly > {output}"

# ---------- Default target redirection ----------
rule final_default:
    output:
        "FINAL.marker"
    default_target:
        True
    shell:
        "touch {output}"
        
rule: no text allowed here  # ERROR: no text allowed after block opener
    input: "back_to_normal.txt"