Generating subsets of size K using bits

#	User	Rating
1	tourist	3880
2	jiangly	3669
3	ecnerwala	3654
4	Benq	3627
5	orzdevinwang	3612
6	Geothermal	3569
6	cnnfls_csy	3569
8	jqdai0815	3532
9	Radewoosh	3522
10	gyh20	3447

#	User	Contrib.
1	awoo	161
1	maomao90	161
3	adamant	156
4	maroonrk	153
5	-is-this-fft-	148
5	atcoder_official	148
5	SecondThread	148
8	Petr	147
9	nor	144
10	TheScrasse	142

Hi Everyone, I encountered this subproblem in a problem.

Suppose we are given an array of integers of size N and want to generate subsets of size K from it. I used this simple backtracking approach to generate the combinations of size K.

Backtracking Code

#include "bits/stdc++.h"
using namespace std;

void generatesubsets(vector<int> choices, int current, int K, vector<int> selected=vector<int>()){

    if(choices.size()-current<K-selected.size())
        return;
    if(selected.size()==K){
        //process subset
        for(auto i:selected)
            cout<<i<<" ";
        cout<<endl;
        return;
    }
    if(current==choices.size())
        return;

    selected.push_back(choices[current]);
    generatesubsets(choices,current+1,K,selected);
    selected.pop_back();
    generatesubsets(choices,current+1,K,selected);

}

int main() {
ios::sync_with_stdio(0);
cin.tie(0);

vector<int> v{1,2,3,4,5,6,7,8,9,10};
generatesubsets(v,0,3);

return 0;
}

I was wondering if there is a possible approach to do this by using bits, a simple approach is generating all subsets and processing only the ones that have size K, but in this, we have to generate all 2^N subsets.

Bits Solution

#include "bits/stdc++.h"
using namespace std;

int main() {
ios::sync_with_stdio(0);
cin.tie(0);

vector<int> v{1,2,3,4,5,6,7,8,9,10};
int N = v.size(),K = 3;
for (int b = 0; b < (1<<N); b++) {
    if(__builtin_popcount(b)!=K)
        continue;
    vector<int> subset;
    for (int i = 0; i < N; i++)
        if (b&(1<<i)) subset.push_back(v[i]);
    
    for(auto i:subset)
        cout<<i<<" ";
    cout<<endl;
}


return 0;
}

Is there a way to do this using bits without generating all the possible subsets?.

typedef long long ll; struct same_setbit_number { static const int bitlim = 50; ll f[bitlim + 1][bitlim + 1]; bool isIncreasing = false; int setbit = -1; same_setbit_number() { memset(f, -1, sizeof(f)); } /// Find next same_bit_number ll nssbn(ll x) // O(1) { ll right = x & -x; ll higher = x + right; return higher | ((x ^ higher) / right) >> 2; } /// Find previous same_bit_number ll pssbn(ll x) // O(1) { return ~nssbn(~x); } /// Find all number of same_bit_number /// With (bit) bits and (0-based) int findall(int bitlim, int setbit, bool isDecreasing) /// O(nck(bitlim, setbit)) { int res = 0; if (isDecreasing) { ll lim = (1LL << bitlim); for (ll mask = (1LL << setbit) - 1; mask < lim; mask = nssbn(mask)) { for (int i = 0; i < bitlim; ++i) cout << (mask >> i & 1); cout << '\n'; ++res; } } else { ll lim = (1LL << bitlim) - (1LL << (bitlim - setbit)); for (ll mask = lim; mask > 0; mask = pssbn(mask)) { for (int i = 0; i < bitlim; ++i) cout << (mask >> i & 1); cout << '\n'; ++res; } } return res; } /// Calculate number of same_bit_number /// With (bit) bits and (0-based) ll calculate(int bit = bitlim, int cnt = 0) // O(bit^2) = O(bit * cnt) { if (cnt > setbit) return 0; if (bit < setbit - cnt - 1) return 0; if (bit < 0) return (cnt == setbit); ll &res = f[bit][cnt]; if (res != -1) return res; res = calculate(bit - 1, cnt + 0) + calculate(bit - 1, cnt + 1); return res; } /// Find value of nth number among all same_bit_number /// With (bit) bits and (0-based) ll find_nth(ll k, int bit = bitlim, int cnt = 0) // O(bit^2) or O(bit) with O(bit^2) precalculation { if (bit < 0) return 0; ll v = calculate(bit - 1, cnt + 0); if (k >= v) return (1LL << bit) + find_nth(k - v, bit - 1, cnt + 1); else return (0LL << bit) + find_nth(k - 0, bit - 1, cnt + 0); } /// Find position of number among all same_bit_number /// With (bit) bits and (0-based) ll find_pos(ll x, int bit = bitlim, int cnt = 0) // O(bit^2) or O(bit) with O(bit^2) precalculation { if (bit < 0) return 0; ll v = calculate(bit - 1, cnt + 0); if (x >> bit & 1) return v + find_pos(x, bit - 1, cnt + 1); else return 0 + find_pos(x, bit - 1, cnt + 0); } /// if (k > 0) Find next |k|th number among all same_bit_number start from 0 /// if (k < 0) Find prev |k|th number among all same_bit_number start from 0 /// With (setbit) bits and (0-based) ll query(ll x, ll k) // O(bitlim^2) or O(bitlim) with O(bitlim^2) precalculation { setbit = __builtin_popcountll(x); return find_nth(find_pos(x) + k); } };

Comments (13)

Show archived | Write comment?

Monogon

4 years ago, # |

+49

One nice way to do this is to create a vector of $$$n-k$$$ 0's followed by $$$k$$$ 1's. Use std::next_permutation to iterate over all combinations.

→ Reply

art-hack

4 years ago, # ^ |

Indeed, a really nice solution. I didn't know that next_permutation takes care of duplicates as well, I thought that it will generate duplicates. I will look into how it actually works.

Phicar

You can use Pascal's recursion. Go concatenating the string.

Can you elaborate more on this?

SPyofgame

← Rev. 2 →

I suggest using std::next_permutation or std::prev_permutation for better complexity $$$O(\binom{n}{k})$$$

Sameple code

#include <algorithm>
#include <iostream>
#include <vector>

using namespace std;

int main()
{
    int n, k;
    cin >> n >> k;

    vector<int> p(n, 0);
    for (int t = n - k; t < n; p[t++] = 1);

    do {
        for (bool x : p) cout << x; cout << endl;
    } while (next_permutation(p.begin(), p.end()));
    return 0;
}

There is an error in the sample code, in the second line it should be p[N-i-1] = 1;. Instead of generating the first subset, it is creating the last one.

I fixed my code, thanks for reminding me

Kognition

← Rev. 3 →

Kactl has really short iterative code for this in the "10.5.1 Bit Hacks" section, with the extremely nice properties that the bitsets are generated in increasing order and no other bitsets are generated; here's roughly what it is:

int mask = (1 << k) - 1, r, c;
while(mask <= (1 << n) - (1 << (n-k))) {
    // Code here
    c = mask & -mask, r = mask+c, mask = r | (((r ^ mask) >> 2)/c);
}

The loop bounds I've written only work for $$$n \leq 30$$$ (or 62 for long longs). It's a really good exercise in bit operations to think about why this works.

Note: if you are adapting this code to long longs, make sure you change things like (1 << k) to (1LL << k).

This is exactly what I was looking for and it looks like that the complexity is even better than the solution that Monogon suggested, as worst-case complexity for next_permutation is O(n). I will now try to think about why it works.

← Rev. 7 →

+18

In case you need to find previous subset instead of next subset, you can you a trick to get the inverse function: prev = ~next(~mask), next = ~prev(~mask)

Generating next subset

/// Next_same_set_bit_number
ll nssbn(ll x) // O(1)
{
    ll right = x & -x;
    ll higher = x + right;
    return higher | ((x ^ higher) / right) >> 2;
}

void test(int n, int k) 
{
    const ll lim = 1LL << n;
    for (ll mask = (1LL << k) - 1; mask < lim; mask = nssbn(mask))
        cout << bitset<16>(mask) << '\n';
}

Generating prev subset

/// Prev_same_set_bit_number
ll pssbn(ll x) // O(1)
{
    return ~nssbn(~x);
}


void test2(int n, int k)
{
    const ll lim = ((1LL << k) - 1) << (n - k);
    for (ll mask = lim; mask > 0; mask = pssbn(mask))
        cout << bitset<16>(mask) << endl;
}

Moreover, if you want to get the kth previous or kth next subset, you can use lexicographical dynamic programming for it.

Here I use mask of long long, you can change to bitset<> to store larger but it will be a bit complicated

Simple Implementation

typedef long long ll;
struct same_setbit_number {
    static const int bitlim = 50;
    bool isIncreasing;
    int setbit;
    vector<vector<ll> > f;

    ll calculate(int bit = bitlim, int cnt = 0)
    {
        if (cnt > setbit) return 0;
        if (bit < setbit - cnt - 1) return 0;
        if (bit < 0) return (cnt == setbit);

        ll &res = f[bit][cnt];
        if (res != -1) return res;

        res = calculate(bit - 1, cnt + 0)
            + calculate(bit - 1, cnt + 1);

        return res;
    }

    ll find_nth(ll k, int bit = bitlim, int cnt = 0)
    {
        if (bit < 0) return 0;
        ll v = calculate(bit - 1, cnt + 0);

        if (k >= v)
            return (1LL << bit) + find_nth(k - v, bit - 1, cnt + 1);
        else
            return (0LL << bit) + find_nth(k - 0, bit - 1, cnt + 0);
    }

    ll find_pos(ll x, int bit = bitlim, int cnt = 0)
    {
        if (bit < 0) return 0;
        ll v = calculate(bit - 1, cnt + 0);

        if (x >> bit & 1)
            return v + find_pos(x, bit - 1, cnt + 1);
        else
            return 0 + find_pos(x, bit - 1, cnt + 0);
    }

    ll query(ll x, ll k)
    {
        f.assign(bitlim + 1, vector<ll>(bitlim, -1));
        setbit = __builtin_popcountll(x);
        return find_nth(find_pos(x) + k);
    }
};

Full implementation (with detail comment)

In case guys interesting, there are some other effecient or non-effecient way to find it. (Sorry ! here I only provides implementations and not describe how these function works perfectly)

Implementation 1 - O(n log n)

/// Next_same_set_bit_number
ll nssbn(ll x)
{
    for (int cnt = __builtin_popcountll(x++); (x >= 0) && cnt != __builtin_popcountll(x); ++x);
    return x;                             /// ^ avoid overflow to negative
}

/// Prev_same_set_bit_number
ll pssbn(ll x)
{
    for (int cnt = __builtin_popcountll(x--); (x >= 0) && cnt != __builtin_popcountll(x); --x);
    return x;                             /// ^ avoid decreasing to negative
}

Notice that finding the number of long long with O(~n log ~n) complexity cause time limit exceed so we cant use inverse function in this case.

Implementation 2

/// Next_same_set_bit_number
ll nssbn(ll x)
{
    int c0 = __builtin_ctzll(x); 
    int c1 = __builtin_ctzll(~x >> c0); 
    return x + (1LL << c0) + (1LL << (c1 - 1)) - 1; 
} 

/// Prev_same_set_bit_number
ll pssbn(ll x)
{
    int c1 = __builtin_ctzll(~x); 
    int c0 = __builtin_ctzll(x >> c1); 
    return x - (1LL << c1) - (1LL << (c0 - 1)) + 1; 
}

or when you have one function already

ll pssbn(ll x) { return ~nssbn(~x); }
ll nssbn(ll x) { return ~pssbn(~x); }

Implementation 3

/// Next_same_set_bit_number
ll nssbn(ll x)
{
    int z = __builtin_ctzll(x);
    x >>= z;
    return ((x + 1) << z) | ((x &~ (x + 1)) >> 1);
}

/// Prev_same_set_bit_number
ll pssbn(ll x)
{
    int z = __builtin_ctzll(~x);
    x >>= z;
    return ~((~(x - 1) << z) | ((~x & (x - 1)) >> 1));
}

or when you have one function already

ll pssbn(ll x) { return ~nssbn(~x); }
ll nssbn(ll x) { return ~pssbn(~x); }

Implementation 4

/// Next_same_set_bit_number
ll nssbn(ll x)
{
    ll t = ~(x | (x - 1));
    return (~t + 1) | (((t & -t) - 1) >> (__builtin_ctzll(x) + 1)); 
}

/// Prev_same_set_bit_number
ll pssbn(ll x)
{
    ll t = x & (x + 1);
    return (t - 1) & ~(((t & -t) - 1) >> (__builtin_ctzll(~x) + 1)); 
}


or when you have one function already

ll pssbn(ll x) { return ~nssbn(~x); }
ll nssbn(ll x) { return ~pssbn(~x); }

Implementation 5

/// Next_same_set_bit_number
ll nssbn(ll x)
{
    ll last_one = x & -x;
    ll last_zero = (x + last_one) &~ x;
    ll ones = (last_zero - 1) &~ (last_one - 1);
    ll zeros = __builtin_ctzll(ones) + 1;
    return x + (last_one) + (ones >> zeros);
}

Drunkula

18 months ago, # ^ |

Can you please explain the divide part (x ^ higher) / right) >> 2

16 months ago, # ^ |

You can output step-to-step bitwise for a few examples to understand it better. As it just does not naturally occur but I compact the code whenever it is possible.

But my ideas are:

I must not change the number of set bits unless this is the last iteration, how can I detect it and what should it return in that case?
Some bits must stay the same, and only a few are shifted, how do I detect which and how can I move them correctly
Can I make it better without using for, loop, workcase, constants, precomputed table?
Can I make it better without using many multiplication, division, and modulo operations ?
Can I make it shorter and faster with fewer bit manipulations and less variables defined?

IMO, you can come up to yourself a better or/and simpler solution and I'd love to see that.

art-hack's blog