/*
 * 3signal.v
 *
 * vim: ts=4 sw=4
 *
 * Copyright (C) 2025 Krzysztof Skrzynecki, Jakub Duchniewicz <j.duchniewicz@gmail.com>
 * SPDX-License-Identifier: TODO:
 */

`default_nettype none

module compare_eq#(
    parameter WIDTH = 16
)(
    input wire clk,

    input wire [WIDTH-1:0] A,
    input wire [WIDTH-1:0] B,

    output reg [0:0] was_eq //1 if A and B were equal 2 ticks ago
);
    parameter WIDTH_DIV4 = (WIDTH+3)/4;
    parameter WIDTH_REM  = WIDTH - WIDTH_DIV4*3;

    wire [WIDTH_DIV4-1:0] A0_pt0, A0_pt1, A0_pt2;
    wire [WIDTH_REM-1:0] A0_pt3;

    wire [WIDTH_DIV4-1:0] B0_pt0, B0_pt1, B0_pt2;
    wire [WIDTH_REM-1:0] B0_pt3;

    assign {A0_pt3, A0_pt2, A0_pt1, A0_pt0} = A;
    assign {B0_pt3, B0_pt2, B0_pt1, B0_pt0} = B;

    reg pt0_eq, pt1_eq, pt2_eq, pt3_eq;

    always @(posedge clk) begin
        pt0_eq <= A0_pt0==B0_pt0;
        pt1_eq <= A0_pt1==B0_pt1;
        pt2_eq <= A0_pt2==B0_pt2;
        pt3_eq <= A0_pt3==B0_pt3;

        was_eq <= pt0_eq & pt1_eq & pt2_eq & pt3_eq;
    end
endmodule

module add#(
    parameter WIDTH = 16
)(
    input wire clk,

    input wire [WIDTH-1:0] A,
    input wire [WIDTH-1:0] B,

    output reg [WIDTH-1:0] sum
);
    parameter WIDTH_DIV2 = (WIDTH+1)/2;
    parameter WIDTH_REM  = WIDTH - WIDTH_DIV2;

    wire [WIDTH_DIV2-1:0] A0_pt0;
    wire [WIDTH_REM-1:0] A0_pt1;

    wire [WIDTH_DIV2-1:0] B0_pt0;
    wire [WIDTH_REM-1:0] B0_pt1;

    assign {A0_pt1, A0_pt0} = A;
    assign {B0_pt1, B0_pt0} = B;

    reg [WIDTH_DIV2:0] C0_pt0;
    reg [WIDTH_REM-1:0] C0_pt1;

    always @(posedge clk) begin
        C0_pt0 <= {1'b0, A0_pt0} + {1'b0, B0_pt0};
        C0_pt1 <= A0_pt1 + B0_pt1;

        sum <= {C0_pt1+{6'b0, C0_pt0[WIDTH_DIV2]}, C0_pt0[WIDTH_DIV2-1:0]};
    end
endmodule


module compare_gt#(
    parameter WIDTH = 16
)(
    input wire clk,

    input wire [WIDTH-1:0] A,
    input wire [WIDTH-1:0] B,

    output reg [0:0] was_gt //1 if A was greater than B 3 ticks ago
);
    parameter WIDTH_DIV4 = (WIDTH+3)/4;
    parameter WIDTH_REM  = WIDTH - WIDTH_DIV4*3;

    wire [WIDTH_DIV4-1:0] A0_pt0, A0_pt1, A0_pt2;
    wire [WIDTH_REM-1:0] A0_pt3;

    wire [WIDTH_DIV4-1:0] B0_pt0, B0_pt1, B0_pt2;
    wire [WIDTH_REM-1:0] B0_pt3;

    assign {A0_pt3, A0_pt2, A0_pt1, A0_pt0} = Ab;
    assign {B0_pt3, B0_pt2, B0_pt1, B0_pt0} = Bb;

    reg pt0_gt, pt1_gt, pt2_gt, pt3_gt;
    reg pt0_eq, pt1_eq, pt2_eq, pt3_eq;

    reg pt01_eq, pt23_eq;
    reg pt01_gt, pt23_gt;

    reg [WIDTH-1:0] Ab, Bb; // buffer regs for shorter wires

    always @(posedge clk) begin
        Ab <= A;
        Bb <= B;

        pt0_gt <= A0_pt0>B0_pt0;
        pt1_gt <= A0_pt1>B0_pt1;
        pt2_gt <= A0_pt2>B0_pt2;
        pt3_gt <= A0_pt3>B0_pt3;

        pt0_eq <= A0_pt0==B0_pt0;
        pt1_eq <= A0_pt1==B0_pt1;
        pt2_eq <= A0_pt2==B0_pt2;
        pt3_eq <= A0_pt3==B0_pt3;

        pt01_eq <= pt0_eq & pt1_eq;
        pt01_gt <= pt1_gt | (pt1_eq & pt0_gt);

        pt23_eq <= pt2_eq & pt3_eq;
        pt23_gt <= pt3_gt | (pt3_eq & pt2_gt);

        was_gt <= pt23_gt | (pt23_eq & pt01_gt);
    end

endmodule

module cycle_trigger#(
    parameter WIDTH = 16
) (
    input wire /*nrst,*/ clk,
    input wire [WIDTH-1:0] period,

    output reg [0:0] start_cycle,
    output reg [0:0] half_cycle,
    output reg [2:0] trig_out
);
    reg [WIDTH-1:0] counter;
    reg [WIDTH-1:0] counter_delayed;// just to make placement easier - less branches from counter
    reg [WIDTH-1:0] period_div2;
    reg [WIDTH-1:0] period_shadow;

    wire zero;

    always @(posedge clk /*or negedge nrst*/) begin
        /*if (!nrst) begin
            counter <= 0;
            //period_div2 <= 16'h0;
            high_match_a <= 0;
            high_match_b <= 0;
            half_match_a <= 0;
            half_match_b <= 0;

            start_cycle <= 0;
            half_cycle  <= 0;
            trig_out    <= 0;
        end else begin*/
            if(zero) begin
                start_cycle <= 1;
                period_div2 <= period>>1;
                period_shadow <= period;
            end else begin
                start_cycle <= 0;
            end

            if (start_cycle) begin
                counter <= period_shadow;
            end else begin
                counter <= counter-1;
            end

            counter_delayed <= counter;

        //end //nrst
    end

    compare_eq#(.WIDTH(WIDTH))cmp_zero(
        .clk(clk),
        .A(counter_delayed),
        .B(0),
        .was_eq(zero)
    );

    compare_eq#(.WIDTH(WIDTH))cmp_half(
        .clk(clk),
        .A(counter_delayed),
        .B(period_div2),
        .was_eq(half_cycle)
    );

    compare_eq#(.WIDTH(WIDTH))cmp0(
        .clk(clk),
        .A(counter_delayed),
        .B(3),
        .was_eq(trig_out[0])
    );

    compare_eq#(.WIDTH(WIDTH))cmp1(
        .clk(clk),
        .A(counter_delayed),
        .B(3),
        .was_eq(trig_out[1])
    );

    compare_eq#(.WIDTH(WIDTH))cmp2(
        .clk(clk),
        .A(counter_delayed),
        .B(9),
        .was_eq(trig_out[2])
    );

endmodule

/*module continous_pwm_gen#(
    parameter WIDTH = 16
) (
    input wire nrst, clk,
    input wire [WIDTH-1:0] period,
    input wire [WIDTH-1:0] delay,

    output reg [0:0] pwm_out
);
    reg [WIDTH-1:0] counter;

    reg [WIDTH-1:0] _period;
    reg [WIDTH-1:0] _delay;

    reg [0:0] next_out;


    reg [0:0] is_last_tick;
    reg [0:0] is_last_tickA;
    reg [0:0] is_last_tickB;
    reg [0:0] is_mid_tick;

    // Counter logic
    always @(posedge clk or negedge nrst) begin
        if (!nrst) begin
            counter <= 0;
            pwm_out <= 0;


            next_out <= 0;

            _period <= 0;
            _delay <= 0;

            is_last_tick <= 0;
            is_last_tickA <= 0;
            is_last_tickB <= 0;
        end else begin
            _period <= period;
            _delay <= delay;

            is_last_tickA <= counter[WIDTH-1:8]==0;
            is_last_tickB <= counter[7:0]==2;
            is_last_tick <= is_last_tickA&is_last_tickB;

            if (is_last_tick) begin
                counter <= _period;
            end else begin
                counter <= counter-1;
            end

            next_out <= is_last_tick;

            pwm_out <= next_out;
        end
    end

endmodule*/

module single_shot_gen#(
    parameter WIDTH = 16
)(
    input wire nrst,clk,
    input wire trigger,
    input [WIDTH-1:0] delay,
    input [WIDTH-1:0] period,
    output reg pwm_out
);
    reg [WIDTH-1:0] counter;
    reg [WIDTH-1:0] counter2; // Buffer register for better timing
    wire pwm_gt;

    always @(posedge clk) begin
        if (trigger) begin
            counter <= period; //########TODO WARNING: counter will underflow which may trigger unwanted pulses. Add logic to fix that.
        end else begin
            counter <= counter - 1;
        end

        // Buffer register to reduce timing pressure
        counter2 <= counter;

        // Output register to reduce delay
        pwm_out <= pwm_gt;
    end

    // Parallel comparison with buffered counter
    compare_gt #(.WIDTH(WIDTH)) cmp_out (
        .clk(clk),
        .A(delay),
        .B(counter2),
        .was_gt(pwm_gt)
    );

endmodule

/*module phase_delay#(
    parameter WIDTH = 16,
    parameter FORWARD_DATA_WIDTH = 16
)(
    input wire nrst, clk,
    input wire in_trig,
    input [WIDTH-1:0] delay,
    input [FORWARD_DATA_WIDTH-1:0] in_data_fwd,

    output wire out_trig,
    output reg [FORWARD_DATA_WIDTH-1:0] out_data_fwd
);
    reg [WIDTH-1:0] counter;
    reg [0:0] is_counting;

    reg [WIDTH-1:0] delay_latched;

    always @(posedge clk or negedge nrst) begin
        if (!nrst) begin
            counter <= 0;
            is_counting <=0;
            delay_latched <=0;
        end else begin
            if (is_counting) begin
                if ((counter+1) >= delay_latched) begin
                    counter <= 0;
                    is_counting <= 0;
                end else begin
                    counter <= counter+1;
                end
            end else begin
                if (in_trig) begin
                    is_counting <= 1;
                    out_data_fwd <= in_data_fwd;
                    delay_latched <= delay;
                end
            end

        end
    end

    assign out_trig=((counter+1)>=delay) && is_counting;
endmodule*/

module simple_counter#(
    parameter WIDTH = 14
)(
    input wire clk,
    input wire trigger,
    input wire if_counting,

    output reg [WIDTH-1:0] cnt_out
);
    parameter HALF_WIDTH=7;

    reg [HALF_WIDTH-1:0] cnt1, cnt0;

    reg cnt0f;

    always @(posedge clk /*or negedge nrst*/) begin
        if (trigger) begin
            cnt1 <= 0;
            cnt0 <= 0;

            cnt0f <= 0;
        end else begin
            //cnt1f <= &cnt1;
            cnt0f <= &cnt0;

            if(if_counting) begin
                cnt0 <= cnt0+1;

                if(cnt0f) begin
                    cnt1 <= cnt1+1;
                end


            end

            cnt_out <= {cnt1, cnt0};
        end
    end
endmodule

module pulse_train_gen#(
    parameter TOTAL_PERIOD_WIDTH = 14,
    parameter SINGLE_CYCLE_WIDTH = 8,
    parameter PULSE_COUNTER_WIDTH = 8
)(
    input wire nrst, clk,
    input wire trigger,
    input [PULSE_COUNTER_WIDTH-1:0] npuls,
    input [SINGLE_CYCLE_WIDTH-1:0] period,
    input [SINGLE_CYCLE_WIDTH-1:0] duty,

    output reg [0:0] pwm_out
);
    reg [TOTAL_PERIOD_WIDTH-1:0] counter;//total waveform counter
    reg [TOTAL_PERIOD_WIDTH-1:0] counter2;

    reg [TOTAL_PERIOD_WIDTH-1:0] cycle_threshold;

    reg [TOTAL_PERIOD_WIDTH-1:0] cycle_threshold2;

    reg [TOTAL_PERIOD_WIDTH-1:0] next_cycle_threshold;

    reg [PULSE_COUNTER_WIDTH-1:0] cycle_counter;
    reg [PULSE_COUNTER_WIDTH-1:0] next_cycle_counter;

    reg [SINGLE_CYCLE_WIDTH-1:0] _period;

    reg if_next_cycle;

    reg is_counting;
    reg is_counting2;

    always @(posedge clk /*or negedge nrst*/) begin
        /*if (!nrst) begin
            is_counting <= 0;
        end else begin*/
            if(trigger) begin
                //counter <= 0;
                cycle_threshold <= {6'b0, _period};
                cycle_counter <= npuls;
            end else begin
                //counter <= counter + is_counting2;
            end
            counter2 <= counter;

            _period <= period;

            if(if_next_cycle & is_counting2) begin
                cycle_counter <= next_cycle_counter;
                cycle_threshold <= next_cycle_threshold;
            end
            next_cycle_counter <= cycle_counter-1;

            is_counting <= |next_cycle_counter;
            is_counting2 <= is_counting;

            pwm_out <= if_next_cycle;

            cycle_threshold2 <= cycle_threshold;
        //end
    end

    simple_counter cnt(
        .clk(clk),
        .trigger(trigger),
        .if_counting(is_counting2),
        .cnt_out(counter)
    );

    compare_eq#(.WIDTH(TOTAL_PERIOD_WIDTH))cmp_cycle(
        .clk(clk),
        .A(counter2),
        .B(cycle_threshold2),
        .was_eq(if_next_cycle)
    );

    /*add#(
        .WIDTH(TOTAL_PERIOD_WIDTH)
    )cycle_th(
        .clk(clk),
        .A(cycle_threshold2),
        .B({6'b0, _period}),
        .sum(next_cycle_threshold)
    );*/
endmodule

parameter [1:0] ODD_TRAIN_FORCE_OFF   = 2'b00;
parameter [1:0] ODD_TRAIN_ENA_CONTROL = 2'b01;
parameter [1:0] ODD_TRAIN_FORCE_ON    = 2'b10;

module three_signal#(
    parameter FAST_PWM_WIDTH=8,
    parameter PULSE_COUNTER_WIDTH=8,
    parameter SLOW_PWM_WIDTH=14
)(
    input wire nrst, clk,

    input [SLOW_PWM_WIDTH-1:0] period1,

    input [SLOW_PWM_WIDTH-1:0] delay1,

    input [SLOW_PWM_WIDTH-1:0] period2,
    input [SLOW_PWM_WIDTH-1:0] delay2,

    input [FAST_PWM_WIDTH-1:0] period3,
    input [FAST_PWM_WIDTH-1:0] duty3,
    input [SLOW_PWM_WIDTH-1:0] delay3,//delay is wrt slow pwm, thus longer bit length
    input [PULSE_COUNTER_WIDTH-1:0] npuls3,

    input /*odd_train_flag_t*/ wire [1:0] odd_train_flag,

    input wire ena_odd_out3,

    output reg Out1,
    output reg Out2,
    output reg Out3
);
    reg [SLOW_PWM_WIDTH-1:0] _period1;
    reg [SLOW_PWM_WIDTH-1:0] _delay1;
    reg [SLOW_PWM_WIDTH-1:0] _duty1;
    reg [SLOW_PWM_WIDTH-1:0] _period2;
    reg [SLOW_PWM_WIDTH-1:0] _delay2;
    reg [FAST_PWM_WIDTH-1:0] _period3;
    reg [FAST_PWM_WIDTH-1:0] _duty3;
    reg [SLOW_PWM_WIDTH-1:0] _delay3;//delay is wrt slow pwm, thus longer bit length
    reg [PULSE_COUNTER_WIDTH-1:0] _npuls3;
    reg /*odd_train_flag_t*/ [1:0] _odd_train_flag;
    reg _ena_odd_out3;

    wire _Out1;
    wire _Out2;
    wire _Out3;


    always @(posedge clk) begin
        _period1        <= period1       ;
        _delay1         <= delay1        ;

        _period2          <= period2         ;
        _delay2        <= delay2        ;

        _period3        <= period3       ;
        _duty3          <= duty3         ;
        _delay3         <= delay3        ;
        _npuls3         <= npuls3        ;
        _odd_train_flag <= odd_train_flag;
        _ena_odd_out3    <= ena_odd_out3 ;


        //TODO - output already as reg; no additional latency needed?
        Out1 <= _Out1;
        Out2 <= _Out2;
        Out3 <= _Out3;
    end

    //wire trigger_next_cycle;
    //wire trigger_even_cycle;
    //wire trigger_odd_cycle;

    wire [SLOW_PWM_WIDTH-1:0] delay3_part1;
    wire [SLOW_PWM_WIDTH-1:0] delay3_part234;

    assign delay3_part234 = delay3>>2;
    assign delay3_part1 = (delay3<4) ? 0 : (delay3 - (delay3_part234*3));

    wire [0:0] start_cycle;
    wire [0:0] half_cycle;
    wire [2:0] trig_out;

    cycle_trigger #(.WIDTH(SLOW_PWM_WIDTH)) cyc_trig(
        //.nrst(nrst),
        .clk(clk),

        .period(period1),

        .start_cycle(start_cycle),
        .half_cycle(half_cycle),
        .trig_out(trig_out)
    );

    single_shot_gen #(.WIDTH(SLOW_PWM_WIDTH)) pwm1(
        .nrst(nrst),
        .clk(clk),
        .trigger(trig_out[0]),
        .delay(_delay1),
        .period(_period1),
        .pwm_out(_Out1)
    );

    single_shot_gen #(.WIDTH(SLOW_PWM_WIDTH)) pwm2(
        .nrst(nrst),
        .clk(clk),
        .trigger(trig_out[1]),
        .delay(_delay2),
        .period(_period2),
        .pwm_out(_Out2)
    );

    pulse_train_gen#(
        .TOTAL_PERIOD_WIDTH(SLOW_PWM_WIDTH),
        .SINGLE_CYCLE_WIDTH(FAST_PWM_WIDTH),
        .PULSE_COUNTER_WIDTH(PULSE_COUNTER_WIDTH)
    )pwm3(
        .nrst(nrst),
        .clk(clk),
        .trigger(trig_out[2]),

        .npuls(_npuls3),
        .period(_period3),
        .duty(_duty3),

        .pwm_out(_Out3)
    );

    /*wire trigger_delay_1_to_2;
    wire trigger_delay_2_to_3;
    wire trigger_delay_3_to_4;
    wire trigger_delay_4_to_pulse_train;

    localparam FORWARD_DATA_WIDTH = SLOW_PWM_WIDTH + PULSE_COUNTER_WIDTH + FAST_PWM_WIDTH + FAST_PWM_WIDTH;
    localparam DELAY_BITS_POS = PULSE_COUNTER_WIDTH + FAST_PWM_WIDTH + FAST_PWM_WIDTH;
    wire [FORWARD_DATA_WIDTH-1:0] data_1_to_2;
    wire [FORWARD_DATA_WIDTH-1:0] data_2_to_3;
    wire [FORWARD_DATA_WIDTH-1:0] data_3_to_4;
    wire [FORWARD_DATA_WIDTH-1:0] data_4_to_gen;

    assign trigger_odd_cycle =
        (odd_train_flag == ODD_TRAIN_ENA_CONTROL) ? trigger_next_cycle && ena_odd_out3 :
        (odd_train_flag == ODD_TRAIN_FORCE_ON) ? trigger_next_cycle :
        0;

    wire [FAST_PWM_WIDTH-1:0] period3_gen;
    wire [FAST_PWM_WIDTH-1:0] duty3_gen;
    wire [PULSE_COUNTER_WIDTH-1:0] npuls3_gen;

    assign duty3_gen = data_4_to_gen[FAST_PWM_WIDTH-1:0];
    assign period3_gen = data_4_to_gen[FAST_PWM_WIDTH+FAST_PWM_WIDTH-1:FAST_PWM_WIDTH];
    assign npuls3_gen = data_4_to_gen[FAST_PWM_WIDTH+FAST_PWM_WIDTH+PULSE_COUNTER_WIDTH-1:FAST_PWM_WIDTH+FAST_PWM_WIDTH];*/
endmodule