Getting TLE'd with O(nlogn) (2-sec)

I cannot figure out why this code is getting TLE'd despite of everything being under order and constraint. I knew those mod operations were costly so I optimized them but still no luck.

Problem : https://codeforces.com/contest/1466/problem/E

Code

//Think simple yet elegant.
#include <bits/stdc++.h>
using namespace std;
#define fast ios::sync_with_stdio(0); cin.tie(0); cout.tie(0);
#define ll  long long
#define all(v) v.begin(),v.end()
#define F first
#define S second
#define pb push_back
#define mp make_pair
#define pi pair<int,int>
#define REP(i,n) for(int i=0;i<n;i++)
const int N = 3e4+10;
const ll mod = 1e9+7;
const int BLOCK_SIZE = 200;


ll mul(ll a,ll b){
	if(a>=mod)
		a%=mod;
	if(b>=mod)
		b%=mod;
	ll xt = a*b;
	if(xt>=mod)
		return (xt)%mod;
	return (xt);
}
ll add(ll a,ll b){
	if((a+b)>=mod)
		return (a+b)%mod;
	return (a+b);
}

void run_case(){
	ll n,i,j,k;

	//<i,j> -> {<j,1>+<j,2>+<j,3>,...............,<j,n>}
	//{<1,1> + <2,1> + .... + <n,1>}-> {1,..|{1,2,...n}}
	cin >> n;
	ll x[n];
	ll C[n][60];
	REP(i,n)
		REP(j,60)
			C[i][j]=0;
	ll col[60];
	memset(col,0,sizeof(col));

	for(i=0;i<n;i++){
		cin >> x[i];
		ll tt = x[i];
		vector<ll> bin;
		while(tt>0){
			bin.pb(tt%2);
			tt/=2;
		}
		for(j=0;j<bin.size();++j){
			C[i][j] = bin[j];
		}

	}
	for(i=0;i<60;i++){
		ll cnt=0;
		for(j=0;j<n;j++)
			cnt+=C[j][i];
		col[i]=cnt;
	}
	ll ans=0;
	for(i=0;i<n;i++){
		ll tt = x[i];
		vector<ll> bin;
		ll O=0,A=0;
		while(tt>0){
			bin.pb(tt%2);
			tt/=2;
		}
		while(bin.size()<60)
			bin.pb(0);
		for(j=0;j<bin.size();++j){
			if(bin[j]==1){
				O = add(O,mul(n,(1LL<<j)));
				A = add(A,mul(col[j],(1LL<<j)));
			}
			else
				O = add(O,mul(col[j],(1LL<<j)));
		}
		ans = add(ans,mul(O,A));
	}
	cout<<ans<<"\n";
}
int main(){
	fast;
	int t;
	cin >> t;
	while(t--){
		run_case();
	}
}

Update-1 : Resolved, It barely passed(1996 ms) after I changed some ll's to ints. Would still like to hear some advice or a good solution though!

Update-2 : The comments are absolute gold thanks guys! Made the necessary changes and made code more concise.

New Code

//Think simple yet elegant.
#include <bits/stdc++.h>
using namespace std;
#define fast ios::sync_with_stdio(0); cin.tie(0); cout.tie(0);
#define ll  long long
#define all(v) v.begin(),v.end()
#define F first
#define S second
#define pb push_back
#define mp make_pair
#define pi pair<int,int>
#define REP(i,n) for(int i=0;i<n;i++)
const int N = 3e4+10;
const ll mod = 1e9+7;
const int BLOCK_SIZE = 200;


ll mul(ll a,ll b){
	if(a>=mod)
		a%=mod;
	if(b>=mod)
		b%=mod;
	if(a*b>=mod)
		return (a*b)%mod;
	return (a*b);
}
ll add(ll a,ll b){
	ll xt = a+b;
	if(xt>=mod)
		return xt-=mod;
	return xt;
}

void run_case(){
	int n,i,j,k;

	//<i,j> -> {<j,1>+<j,2>+<j,3>,...............,<j,n>}
	//{<1,1> + <2,1> + .... + <n,1>}-> {1,..|{1,2,...n}}
	
	cin >> n;
	ll x[n];

	int col[60];
	memset(col,0,sizeof(col));

	for(i=0;i<n;i++){
		cin >> x[i];
		for(j=0;j<60;++j){
			if(x[i]&(1LL<<j))
				++col[j];
		}
	}
	ll ans=0;
	for(i=0;i<n;i++){
		ll O=0,A=0;
		for(j=0;j<60;++j){
			if(x[i]&(1LL<<j)){
				O = add(O,mul(1LL*n,(1LL<<j)));
				A = add(A,mul(1LL*col[j],(1LL<<j)));
			}
			else
				O = add(O,mul(1LL*col[j],(1LL<<j)));
		}
		ans = add(ans,mul(O,A));
	}
	cout<<ans<<"\n";
}
int main(){
	fast;
	int t;
	cin >> t;
	while(t--){
		run_case();
	}
}

GNU C++17(64 bit) -> 453 ms

GNU C++17 ->1263 ms

Would still like to know more on the difference in time on the above 2 compilers.

Thanks!

Comments (2)

Show archived | Write comment?

rabbitsthecat

3 years ago, # |

I can't help you, but just wanted to let you know for some reason your code in 64-bit G++17 passes in 1.2 seconds.

→ Reply

LuchkinVyacheslav

Might be related to the fact that you're converting 5*10^5 numbers to binary by making 60 push_backs to an std::vector<long long> (causing it to reallocate and copy its contents multiple times), then copying them back to memory using 8 bytes per bit (later only accessing them once to count numbers with a given bit set), and then inefficiently converting the same 5*10^5 numbers to binary again, while converting a number to binary normally requires no action at all.

#	User	Rating
1	tourist	3690
2	jiangly	3647
3	Benq	3581
4	orzdevinwang	3570
5	Geothermal	3569
5	cnnfls_csy	3569
7	Radewoosh	3509
8	ecnerwala	3486
9	jqdai0815	3474
10	gyh20	3447

#	User	Contrib.
1	maomao90	174
2	awoo	165
3	adamant	161
4	TheScrasse	160
5	nor	158
6	maroonrk	156
7	-is-this-fft-	152
8	orz	146
9	SecondThread	145
9	pajenegod	145

Codeforcer's blog