Statistics
| Revision:

root / wattch / power.c @ 52

History | View | Annotate | Download (103 KB)

1
/* I inclued this copyright since we're using Cacti for some stuff */
2

    
3
/*------------------------------------------------------------
4
 *  Copyright 1994 Digital Equipment Corporation and Steve Wilton
5
 *                         All Rights Reserved
6
 *
7
 * Permission to use, copy, and modify this software and its documentation is
8
 * hereby granted only under the following terms and conditions.  Both the
9
 * above copyright notice and this permission notice must appear in all copies
10
 * of the software, derivative works or modified versions, and any portions
11
 * thereof, and both notices must appear in supporting documentation.
12
 *
13
 * Users of this software agree to the terms and conditions set forth herein,
14
 * and hereby grant back to Digital a non-exclusive, unrestricted, royalty-
15
 * free right and license under any changes, enhancements or extensions
16
 * made to the core functions of the software, including but not limited to
17
 * those affording compatibility with other hardware or software
18
 * environments, but excluding applications which incorporate this software.
19
 * Users further agree to use their best efforts to return to Digital any
20
 * such changes, enhancements or extensions that they make and inform Digital
21
 * of noteworthy uses of this software.  Correspondence should be provided
22
 * to Digital at:
23
 *
24
 *                       Director of Licensing
25
 *                       Western Research Laboratory
26
 *                       Digital Equipment Corporation
27
 *                       100 Hamilton Avenue
28
 *                       Palo Alto, California  94301
29
 *
30
 * This software may be distributed (but not offered for sale or transferred
31
 * for compensation) to third parties, provided such third parties agree to
32
 * abide by the terms and conditions of this notice.
33
 *
34
 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
35
 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
36
 * OF MERCHANTABILITY AND FITNESS.   IN NO EVENT SHALL DIGITAL EQUIPMENT
37
 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
38
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
39
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
41
 * SOFTWARE.
42
 *------------------------------------------------------------*/
43

    
44
#include <math.h>
45
#include "power.h"
46
#include "machine.h"
47
#include "cache.h"
48
#include "sim.h"
49
#include <assert.h>
50

    
51
//#define SensePowerfactor (Mhz)*(Vdd/2)*(Vdd/2)
52
//#define Sense2Powerfactor (Mhz)*(2*.3+.1*Vdd)
53
//#define Powerfactor (Mhz)*Vdd*Vdd
54
//#define LowSwingPowerfactor (Mhz)*.2*.2
55
/* set scale for crossover (vdd->gnd) currents */
56
double crossover_scaling = 1.2;
57
/* set non-ideal turnoff percentage */
58
double turnoff_factor = 0.1;
59

    
60
#define MSCALE (LSCALE * .624 / .2250)
61

    
62
/*----------------------------------------------------------------------*/
63

    
64
/* static power model results */
65
power_result_type power;
66

    
67
int pow2(int x) {
68
  return((int)pow(2.0,(double)x));
69
}
70

    
71
double logfour(x)
72
     double x;
73
{
74
  if (x<=0) fprintf(stderr,"%e\n",x);
75
  return( (double) (log(x)/log(4.0)) );
76
}
77

    
78
/* safer pop count to validate the fast algorithm */
79
int pop_count_slow(bquad_t bits)
80
{
81
  int count = 0; 
82
  bquad_t tmpbits = bits; 
83
  while (tmpbits) { 
84
    if (tmpbits & 1) ++count; 
85
    tmpbits >>= 1; 
86
  } 
87
  return count; 
88
}
89

    
90
/* fast pop count */
91
int pop_count(bquad_t bits)
92
{
93
#define T unsigned long long
94
#define ONES ((T)(-1)) 
95
#define TWO(k) ((T)1 << (k)) 
96
#define CYCL(k) (ONES/(1 + (TWO(TWO(k))))) 
97
#define BSUM(x,k) ((x)+=(x) >> TWO(k), (x) &= CYCL(k)) 
98
  bquad_t x = bits; 
99
  x = (x & CYCL(0)) + ((x>>TWO(0)) & CYCL(0)); 
100
  x = (x & CYCL(1)) + ((x>>TWO(1)) & CYCL(1)); 
101
  BSUM(x,2); 
102
  BSUM(x,3); 
103
  BSUM(x,4); 
104
  BSUM(x,5); 
105
  return x; 
106
}
107

    
108

    
109
int opcode_length = 8;
110
int inst_length = 32;
111

    
112
extern int ruu_decode_width;
113
extern int ruu_issue_width;
114
extern int ruu_commit_width;
115
extern int RUU_size;
116
extern int LSQ_size;
117
extern int data_width;
118
extern int res_ialu;
119
extern int res_fpalu;
120
extern int res_memport;
121

    
122
int nvreg_width;
123
int npreg_width;
124

    
125
extern int bimod_config[];
126

    
127
extern struct cache_t *cache_dl1;
128
extern struct cache_t *cache_il1;
129
extern struct cache_t *cache_dl2;
130

    
131
extern struct cache_t *dtlb;
132
extern struct cache_t *itlb;
133

    
134
/* 2-level predictor config (<l1size> <l2size> <hist_size> <xor>) */
135
extern int twolev_config[];
136

    
137
/* combining predictor config (<meta_table_size> */
138
extern int comb_config[];
139

    
140
/* return address stack (RAS) size */
141
extern int ras_size;
142

    
143
/* BTB predictor config (<num_sets> <associativity>) */
144
extern int btb_config[];
145

    
146
double global_clockcap;
147

    
148
static double rename_power=0;
149
static double bpred_power=0;
150
static double window_power=0;
151
static double lsq_power=0;
152
static double regfile_power=0;
153
static double icache_power=0;
154
static double dcache_power=0;
155
static double dcache2_power=0;
156
static double alu_power=0;
157
static double falu_power=0;
158
static double resultbus_power=0;
159
static double clock_power=0;
160

    
161
static double rename_power_cc1=0;
162
static double bpred_power_cc1=0;
163
static double window_power_cc1=0;
164
static double lsq_power_cc1=0;
165
static double regfile_power_cc1=0;
166
static double icache_power_cc1=0;
167
static double dcache_power_cc1=0;
168
static double dcache2_power_cc1=0;
169
static double alu_power_cc1=0;
170
static double resultbus_power_cc1=0;
171
static double clock_power_cc1=0;
172

    
173
static double rename_power_cc2=0;
174
static double bpred_power_cc2=0;
175
static double window_power_cc2=0;
176
static double lsq_power_cc2=0;
177
static double regfile_power_cc2=0;
178
static double icache_power_cc2=0;
179
static double dcache_power_cc2=0;
180
static double dcache2_power_cc2=0;
181
static double alu_power_cc2=0;
182
static double resultbus_power_cc2=0;
183
static double clock_power_cc2=0;
184

    
185
static double rename_power_cc3=0;
186
static double bpred_power_cc3=0;
187
static double window_power_cc3=0;
188
static double lsq_power_cc3=0;
189
static double regfile_power_cc3=0;
190
static double icache_power_cc3=0;
191
static double dcache_power_cc3=0;
192
static double dcache2_power_cc3=0;
193
static double alu_power_cc3=0;
194
static double resultbus_power_cc3=0;
195
static double clock_power_cc3=0;
196

    
197
static double total_cycle_power;
198
static double total_cycle_power_cc1;
199
static double total_cycle_power_cc2;
200
static double total_cycle_power_cc3;
201

    
202
static double total_parasitic_cc1 = 0.0;
203
static double total_parasitic_cc2 = 0.0;
204
static double total_parasitic_cc3 = 0.0;
205
#define PARASITIC_OHM 0.002
206
static double max_amp = 0.00;
207
static double min_amp = 1000.00;
208
static double offchip_ploss[] = {0.5, 0.5, // 1 amp
209
                                 0.5, 0.5, // 2 amp
210
                                 0.5, 0.5, // 3 amp
211
                                 0.6, 0.7, // 4
212
                                 0.8, 0.9, // 5
213
                                 1.0, 1.1, // 6
214
                                 1.2, 1.3, // 7
215
                                 1.5, 1.6, // 8
216
                                 1.8, 2.0, // 9
217
                                 2.2, 2.4, // 10
218
                                 2.6, 2.8, // 11
219
                                 3.0, 3.3, // 12
220
                                 3.6, 3.9, 4.0}; // 13
221

    
222
static double last_single_total_cycle_power_cc1 = 0.0;
223
static double last_single_total_cycle_power_cc2 = 0.0;
224
static double last_single_total_cycle_power_cc3 = 0.0;
225
static double current_total_cycle_power_cc1;
226
static double current_total_cycle_power_cc2;
227
static double current_total_cycle_power_cc3;
228

    
229
static double last_sim_num_insn = 0;
230
static double last_sim_total_insn = 0;
231
static double diff_dispatch = 0;
232
static double diff_commit = 0;
233
static int speed_grade = 1;
234
static int last_speed_grade = 1;
235
static double diff_dispatch_sum = 0;
236
static double diff_commit_sum = 0;
237
static int init_count = 0;
238
//#define DVFS_FIX
239
#define SUM_OVER 50000 // longer time = more power consumed
240
static double hist_dispatch[SUM_OVER];
241
static double hist_commit[SUM_OVER];
242
static int hist_idx = 0;
243
static double slow_cycles = 0;
244
static double fast_cycles = 0;
245
static double last_switch_time = 0;
246
static double cycle_count = 0;
247
#define SWITCH_CYCLES 30
248
static int speed_delay[SWITCH_CYCLES];
249
#define ONCHIP_VREG_LOSS_LOW 0.220
250
#define ONCHIP_VREG_LOSS_HIGH 0.120
251

    
252
static double max_cycle_power_cc1 = 0.0;
253
static double max_cycle_power_cc2 = 0.0;
254
static double max_cycle_power_cc3 = 0.0;
255

    
256
extern counter_t rename_access;
257
extern counter_t bpred_access;
258
extern counter_t window_access;
259
extern counter_t lsq_access;
260
extern counter_t regfile_access;
261
extern counter_t icache_access;
262
extern counter_t dcache_access;
263
extern counter_t dcache2_access;
264
extern counter_t alu_access;
265
extern counter_t ialu_access;
266
extern counter_t falu_access;
267
extern counter_t resultbus_access;
268

    
269
extern counter_t window_selection_access;
270
extern counter_t window_wakeup_access;
271
extern counter_t window_preg_access;
272
extern counter_t lsq_preg_access;
273
extern counter_t lsq_wakeup_access;
274
extern counter_t lsq_store_data_access;
275
extern counter_t lsq_load_data_access;
276

    
277
extern counter_t window_total_pop_count_cycle;
278
extern counter_t window_num_pop_count_cycle;
279
extern counter_t lsq_total_pop_count_cycle;
280
extern counter_t lsq_num_pop_count_cycle;
281
extern counter_t regfile_total_pop_count_cycle;
282
extern counter_t regfile_num_pop_count_cycle;
283
extern counter_t resultbus_total_pop_count_cycle;
284
extern counter_t resultbus_num_pop_count_cycle;
285

    
286
static counter_t total_rename_access=0;
287
static counter_t total_bpred_access=0;
288
static counter_t total_window_access=0;
289
static counter_t total_lsq_access=0;
290
static counter_t total_regfile_access=0;
291
static counter_t total_icache_access=0;
292
static counter_t total_dcache_access=0;
293
static counter_t total_dcache2_access=0;
294
static counter_t total_alu_access=0;
295
static counter_t total_resultbus_access=0;
296

    
297
static counter_t max_rename_access;
298
static counter_t max_bpred_access;
299
static counter_t max_window_access;
300
static counter_t max_lsq_access;
301
static counter_t max_regfile_access;
302
static counter_t max_icache_access;
303
static counter_t max_dcache_access;
304
static counter_t max_dcache2_access;
305
static counter_t max_alu_access;
306
static counter_t max_resultbus_access;
307

    
308
void clear_access_stats()
309
{
310
  rename_access=0;
311
  bpred_access=0;
312
  window_access=0;
313
  lsq_access=0;
314
  regfile_access=0;
315
  icache_access=0;
316
  dcache_access=0;
317
  dcache2_access=0;
318
  alu_access=0;
319
  ialu_access=0;
320
  falu_access=0;
321
  resultbus_access=0;
322

    
323
  window_preg_access=0;
324
  window_selection_access=0;
325
  window_wakeup_access=0;
326
  lsq_store_data_access=0;
327
  lsq_load_data_access=0;
328
  lsq_wakeup_access=0;
329
  lsq_preg_access=0;
330

    
331
  window_total_pop_count_cycle=0;
332
  window_num_pop_count_cycle=0;
333
  lsq_total_pop_count_cycle=0;
334
  lsq_num_pop_count_cycle=0;
335
  regfile_total_pop_count_cycle=0;
336
  regfile_num_pop_count_cycle=0;
337
  resultbus_total_pop_count_cycle=0;
338
  resultbus_num_pop_count_cycle=0;
339
}
340

    
341
/* compute bitline activity factors which we use to scale bitline power 
342
   Here it is very important whether we assume 0's or 1's are
343
   responsible for dissipating power in pre-charged stuctures. (since
344
   most of the bits are 0's, we assume the design is power-efficient
345
   enough to allow 0's to _not_ discharge 
346
*/
347
double compute_af(counter_t num_pop_count_cycle,counter_t total_pop_count_cycle,int pop_width) {
348
  double avg_pop_count;
349
  double af,af_b;
350

    
351
  if(num_pop_count_cycle)
352
    avg_pop_count = (double)total_pop_count_cycle / (double)num_pop_count_cycle;
353
  else
354
    avg_pop_count = 0;
355

    
356
  af = avg_pop_count / (double)pop_width;
357
  
358
  af_b = 1.0 - af;
359

    
360
  /*  printf("af == %f%%, af_b == %f%%, total_pop == %d, num_pop == %d\n",100*af,100*af_b,total_pop_count_cycle,num_pop_count_cycle); */
361

    
362
  return(af_b);
363
}
364

    
365
/* compute power statistics on each cycle, for each conditional clocking style.  Obviously
366
most of the speed penalty comes here, so if you don't want per-cycle power estimates
367
you could post-process 
368

369
See README.wattch for details on the various clock gating styles.
370

371
*/
372
void update_power_stats()
373
{
374
  double window_af_b, lsq_af_b, regfile_af_b, resultbus_af_b;
375
  double current;
376
  int speed_idx;
377

    
378
#ifdef DYNAMIC_AF
379
  window_af_b = compute_af(window_num_pop_count_cycle,window_total_pop_count_cycle,data_width);
380
  lsq_af_b = compute_af(lsq_num_pop_count_cycle,lsq_total_pop_count_cycle,data_width);
381
  regfile_af_b = compute_af(regfile_num_pop_count_cycle,regfile_total_pop_count_cycle,data_width);
382
  resultbus_af_b = compute_af(resultbus_num_pop_count_cycle,resultbus_total_pop_count_cycle,data_width);
383
#endif
384
  
385
  rename_power+=power.rename_power;
386
  bpred_power+=power.bpred_power;
387
  window_power+=power.window_power;
388
  lsq_power+=power.lsq_power;
389
  regfile_power+=power.regfile_power;
390
  icache_power+=power.icache_power+power.itlb;
391
  dcache_power+=power.dcache_power+power.dtlb;
392
  dcache2_power+=power.dcache2_power;
393
  alu_power+=power.ialu_power + power.falu_power;
394
  falu_power+=power.falu_power;
395
  resultbus_power+=power.resultbus;
396
  clock_power+=power.clock_power;
397

    
398
  total_rename_access+=rename_access;
399
  total_bpred_access+=bpred_access;
400
  total_window_access+=window_access;
401
  total_lsq_access+=lsq_access;
402
  total_regfile_access+=regfile_access;
403
  total_icache_access+=icache_access;
404
  total_dcache_access+=dcache_access;
405
  total_dcache2_access+=dcache2_access;
406
  total_alu_access+=alu_access;
407
  total_resultbus_access+=resultbus_access;
408

    
409
  max_rename_access=MAX(rename_access,max_rename_access);
410
  max_bpred_access=MAX(bpred_access,max_bpred_access);
411
  max_window_access=MAX(window_access,max_window_access);
412
  max_lsq_access=MAX(lsq_access,max_lsq_access);
413
  max_regfile_access=MAX(regfile_access,max_regfile_access);
414
  max_icache_access=MAX(icache_access,max_icache_access);
415
  max_dcache_access=MAX(dcache_access,max_dcache_access);
416
  max_dcache2_access=MAX(dcache2_access,max_dcache2_access);
417
  max_alu_access=MAX(alu_access,max_alu_access);
418
  max_resultbus_access=MAX(resultbus_access,max_resultbus_access);
419
      
420
  if(rename_access) {
421
    rename_power_cc1+=power.rename_power;
422
    rename_power_cc2+=((double)rename_access/(double)ruu_decode_width)*power.rename_power;
423
    rename_power_cc3+=((double)rename_access/(double)ruu_decode_width)*power.rename_power;
424
  }
425
  else 
426
    rename_power_cc3+=turnoff_factor*power.rename_power;
427

    
428
  if(bpred_access) {
429
    if(bpred_access <= 2)
430
      bpred_power_cc1+=power.bpred_power;
431
    else
432
      bpred_power_cc1+=((double)bpred_access/2.0) * power.bpred_power;
433
    bpred_power_cc2+=((double)bpred_access/2.0) * power.bpred_power;
434
    bpred_power_cc3+=((double)bpred_access/2.0) * power.bpred_power;
435
  }
436
  else
437
    bpred_power_cc3+=turnoff_factor*power.bpred_power;
438

    
439
#ifdef STATIC_AF
440
  if(window_preg_access) {
441
    if(window_preg_access <= 3*ruu_issue_width)
442
      window_power_cc1+=power.rs_power;
443
    else
444
      window_power_cc1+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
445
    window_power_cc2+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
446
    window_power_cc3+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
447
  }
448
  else
449
    window_power_cc3+=turnoff_factor*power.rs_power;
450
#elif defined(DYNAMIC_AF)
451
  if(window_preg_access) {
452
    if(window_preg_access <= 3*ruu_issue_width)
453
      window_power_cc1+=power.rs_power_nobit + window_af_b*power.rs_bitline;
454
    else
455
      window_power_cc1+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
456
    window_power_cc2+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
457
    window_power_cc3+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
458
  }
459
  else
460
    window_power_cc3+=turnoff_factor*power.rs_power;
461
#else
462
  panic("no AF-style defined\n");
463
#endif
464

    
465
  if(window_selection_access) {
466
    if(window_selection_access <= ruu_issue_width)
467
      window_power_cc1+=power.selection;
468
    else
469
      window_power_cc1+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
470
    window_power_cc2+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
471
    window_power_cc3+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
472
  }
473
  else
474
    window_power_cc3+=turnoff_factor*power.selection;
475

    
476
  if(window_wakeup_access) {
477
    if(window_wakeup_access <= ruu_issue_width)
478
      window_power_cc1+=power.wakeup_power;
479
    else
480
      window_power_cc1+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
481
    window_power_cc2+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
482
    window_power_cc3+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
483
  }
484
  else
485
    window_power_cc3+=turnoff_factor*power.wakeup_power;
486

    
487
  if(lsq_wakeup_access) {
488
    if(lsq_wakeup_access <= res_memport)
489
      lsq_power_cc1+=power.lsq_wakeup_power;
490
    else
491
      lsq_power_cc1+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
492
    lsq_power_cc2+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
493
    lsq_power_cc3+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
494
  }
495
  else
496
    lsq_power_cc3+=turnoff_factor*power.lsq_wakeup_power;
497

    
498
#ifdef STATIC_AF
499
  if(lsq_preg_access) {
500
    if(lsq_preg_access <= res_memport)
501
      lsq_power_cc1+=power.lsq_rs_power;
502
    else
503
      lsq_power_cc1+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
504
    lsq_power_cc2+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
505
    lsq_power_cc3+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
506
  }
507
  else
508
    lsq_power_cc3+=turnoff_factor*power.lsq_rs_power;
509
#else
510
  if(lsq_preg_access) {
511
    if(lsq_preg_access <= res_memport)
512
      lsq_power_cc1+=power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline;
513
    else
514
      lsq_power_cc1+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
515
    lsq_power_cc2+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
516
    lsq_power_cc3+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
517
  }
518
  else
519
    lsq_power_cc3+=turnoff_factor*power.lsq_rs_power;
520
#endif
521

    
522
#ifdef STATIC_AF
523
  if(regfile_access) {
524
    if(regfile_access <= (3.0*ruu_commit_width))
525
      regfile_power_cc1+=power.regfile_power;
526
    else
527
      regfile_power_cc1+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
528
    regfile_power_cc2+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
529
    regfile_power_cc3+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
530
  }
531
  else
532
    regfile_power_cc3+=turnoff_factor*power.regfile_power;
533
#else
534
  if(regfile_access) {
535
    if(regfile_access <= (3.0*ruu_commit_width))
536
      regfile_power_cc1+=power.regfile_power_nobit + regfile_af_b*power.regfile_bitline;
537
    else
538
      regfile_power_cc1+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
539
    regfile_power_cc2+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
540
    regfile_power_cc3+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
541
  }
542
  else
543
    regfile_power_cc3+=turnoff_factor*power.regfile_power;
544
#endif
545

    
546
  if(icache_access) {
547
    /* don't scale icache because we assume 1 line is fetched, unless fetch stalls */
548
    icache_power_cc1+=power.icache_power+power.itlb;
549
    icache_power_cc2+=power.icache_power+power.itlb;
550
    icache_power_cc3+=power.icache_power+power.itlb;
551
  }
552
  else
553
    icache_power_cc3+=turnoff_factor*(power.icache_power+power.itlb);
554

    
555
  if(dcache_access) {
556
    if(dcache_access <= res_memport)
557
      dcache_power_cc1+=power.dcache_power+power.dtlb;
558
    else
559
      dcache_power_cc1+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
560
                                                     power.dtlb);
561
    dcache_power_cc2+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
562
                                                   power.dtlb);
563
    dcache_power_cc3+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
564
                                                   power.dtlb);
565
  }
566
  else
567
    dcache_power_cc3+=turnoff_factor*(power.dcache_power+power.dtlb);
568

    
569
  if(dcache2_access) {
570
    if(dcache2_access <= res_memport)
571
      dcache2_power_cc1+=power.dcache2_power;
572
    else
573
      dcache2_power_cc1+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
574
    dcache2_power_cc2+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
575
    dcache2_power_cc3+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
576
  }
577
  else
578
    dcache2_power_cc3+=turnoff_factor*power.dcache2_power;
579

    
580
  if(alu_access) {
581
    if(ialu_access)
582
      alu_power_cc1+=power.ialu_power;
583
    else
584
      alu_power_cc3+=turnoff_factor*power.ialu_power;
585
    if(falu_access)
586
      alu_power_cc1+=power.falu_power;
587
    else
588
      alu_power_cc3+=turnoff_factor*power.falu_power;
589

    
590
    alu_power_cc2+=((double)ialu_access/(double)res_ialu)*power.ialu_power +
591
      ((double)falu_access/(double)res_fpalu)*power.falu_power;
592
    alu_power_cc3+=((double)ialu_access/(double)res_ialu)*power.ialu_power +
593
      ((double)falu_access/(double)res_fpalu)*power.falu_power;
594
  }
595
  else
596
    alu_power_cc3+=turnoff_factor*(power.ialu_power + power.falu_power);
597

    
598
#ifdef STATIC_AF
599
  if(resultbus_access) {
600
    assert(ruu_issue_width != 0);
601
    if(resultbus_access <= ruu_issue_width) {
602
      resultbus_power_cc1+=power.resultbus;
603
    }
604
    else {
605
      resultbus_power_cc1+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
606
    }
607
    resultbus_power_cc2+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
608
    resultbus_power_cc3+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
609
  }
610
  else
611
    resultbus_power_cc3+=turnoff_factor*power.resultbus;
612
#else
613
  if(resultbus_access) {
614
    assert(ruu_issue_width != 0);
615
    if(resultbus_access <= ruu_issue_width) {
616
      resultbus_power_cc1+=resultbus_af_b*power.resultbus;
617
    }
618
    else {
619
      resultbus_power_cc1+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
620
    }
621
    resultbus_power_cc2+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
622
    resultbus_power_cc3+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
623
  }
624
  else
625
    resultbus_power_cc3+=turnoff_factor*power.resultbus;
626
#endif
627

    
628
  total_cycle_power = rename_power + bpred_power + window_power + 
629
    lsq_power + regfile_power + icache_power + dcache_power +
630
    alu_power + resultbus_power;
631

    
632
  total_cycle_power_cc1 = rename_power_cc1 + bpred_power_cc1 + 
633
    window_power_cc1 + lsq_power_cc1 + regfile_power_cc1 + 
634
    icache_power_cc1 + dcache_power_cc1 + alu_power_cc1 + 
635
    resultbus_power_cc1;
636

    
637
  total_cycle_power_cc2 = rename_power_cc2 + bpred_power_cc2 + 
638
    window_power_cc2 + lsq_power_cc2 + regfile_power_cc2 + 
639
    icache_power_cc2 + dcache_power_cc2 + alu_power_cc2 + 
640
    resultbus_power_cc2;
641

    
642
  total_cycle_power_cc3 = rename_power_cc3 + bpred_power_cc3 + 
643
    window_power_cc3 + lsq_power_cc3 + regfile_power_cc3 + 
644
    icache_power_cc3 + dcache_power_cc3 + alu_power_cc3 + 
645
    resultbus_power_cc3;
646

    
647
  clock_power_cc1+=power.clock_power*(total_cycle_power_cc1/total_cycle_power);
648
  clock_power_cc2+=power.clock_power*(total_cycle_power_cc2/total_cycle_power);
649
  clock_power_cc3+=power.clock_power*(total_cycle_power_cc3/total_cycle_power);
650

    
651
  total_cycle_power_cc1 += clock_power_cc1;
652
  total_cycle_power_cc2 += clock_power_cc2;
653
  total_cycle_power_cc3 += clock_power_cc3;
654

    
655
  current_total_cycle_power_cc1 = total_cycle_power_cc1
656
    -last_single_total_cycle_power_cc1;
657
  current_total_cycle_power_cc2 = total_cycle_power_cc2
658
    -last_single_total_cycle_power_cc2;
659
  current_total_cycle_power_cc3 = total_cycle_power_cc3
660
    -last_single_total_cycle_power_cc3;
661

    
662
   current = current_total_cycle_power_cc3 / Vdd;
663

    
664
  if (max_amp < current ) {
665
      max_amp = current ;
666
  }
667

    
668
  if (min_amp > current) {
669
      min_amp = current;
670
  }
671

    
672
  if (current < 0.5) {
673
      total_parasitic_cc1 += offchip_ploss[0];
674
      total_parasitic_cc2 += offchip_ploss[0];
675
      total_parasitic_cc3 += offchip_ploss[0];
676
  } else if (current < 1) {
677
      total_parasitic_cc1 += offchip_ploss[1];
678
      total_parasitic_cc2 += offchip_ploss[1];
679
      total_parasitic_cc3 += offchip_ploss[1];
680
  } else if (current < 1.5) {
681
      total_parasitic_cc1 += offchip_ploss[2];
682
      total_parasitic_cc2 += offchip_ploss[2];
683
      total_parasitic_cc3 += offchip_ploss[2];
684
  } else if (current < 2) {
685
      total_parasitic_cc1 += offchip_ploss[3];
686
      total_parasitic_cc2 += offchip_ploss[3];
687
      total_parasitic_cc3 += offchip_ploss[3];
688
  } else if (current < 2.5) {
689
      total_parasitic_cc1 += offchip_ploss[4];
690
      total_parasitic_cc2 += offchip_ploss[4];
691
      total_parasitic_cc3 += offchip_ploss[4];
692
  } else if (current < 3) {
693
      total_parasitic_cc1 += offchip_ploss[5];
694
      total_parasitic_cc2 += offchip_ploss[5];
695
      total_parasitic_cc3 += offchip_ploss[5];
696
  } else if (current < 3.5) {
697
      total_parasitic_cc1 += offchip_ploss[6];
698
      total_parasitic_cc2 += offchip_ploss[6];
699
      total_parasitic_cc3 += offchip_ploss[6];
700
  } else if (current < 4) {
701
      total_parasitic_cc1 += offchip_ploss[7];
702
      total_parasitic_cc2 += offchip_ploss[7];
703
      total_parasitic_cc3 += offchip_ploss[7];
704
  } else if (current < 4.5) {
705
      total_parasitic_cc1 += offchip_ploss[8];
706
      total_parasitic_cc2 += offchip_ploss[8];
707
      total_parasitic_cc3 += offchip_ploss[8];
708
  } else if (current < 5) {
709
      total_parasitic_cc1 += offchip_ploss[9];
710
      total_parasitic_cc2 += offchip_ploss[9];
711
      total_parasitic_cc3 += offchip_ploss[9];
712
  } else if (current < 5.5) {
713
      total_parasitic_cc1 += offchip_ploss[10];
714
      total_parasitic_cc2 += offchip_ploss[10];
715
      total_parasitic_cc3 += offchip_ploss[10];
716
  } else if (current < 6) {
717
      total_parasitic_cc1 += offchip_ploss[11];
718
      total_parasitic_cc2 += offchip_ploss[11];
719
      total_parasitic_cc3 += offchip_ploss[11];
720
  } else if (current < 6.5) {
721
      total_parasitic_cc1 += offchip_ploss[12];
722
      total_parasitic_cc2 += offchip_ploss[12];
723
      total_parasitic_cc3 += offchip_ploss[12];
724
  } else if (current < 7) {
725
      total_parasitic_cc1 += offchip_ploss[13];
726
      total_parasitic_cc2 += offchip_ploss[13];
727
      total_parasitic_cc3 += offchip_ploss[13];
728
  } else if (current < 7.5) {
729
      total_parasitic_cc1 += offchip_ploss[14];
730
      total_parasitic_cc2 += offchip_ploss[14];
731
      total_parasitic_cc3 += offchip_ploss[14];
732
  } else if (current < 8) {
733
      total_parasitic_cc1 += offchip_ploss[15];
734
      total_parasitic_cc2 += offchip_ploss[15];
735
      total_parasitic_cc3 += offchip_ploss[15];
736
  } else if (current < 8.5) {
737
      total_parasitic_cc1 += offchip_ploss[16];
738
      total_parasitic_cc2 += offchip_ploss[16];
739
      total_parasitic_cc3 += offchip_ploss[16];
740
  } else if (current < 9) {
741
      total_parasitic_cc1 += offchip_ploss[17];
742
      total_parasitic_cc2 += offchip_ploss[17];
743
      total_parasitic_cc3 += offchip_ploss[17];
744
  } else if (current < 9.5) {
745
      total_parasitic_cc1 += offchip_ploss[18];
746
      total_parasitic_cc2 += offchip_ploss[18];
747
      total_parasitic_cc3 += offchip_ploss[18];
748
  } else if (current < 10) {
749
      total_parasitic_cc1 += offchip_ploss[19];
750
      total_parasitic_cc2 += offchip_ploss[19];
751
      total_parasitic_cc3 += offchip_ploss[19];
752
  } else if (current < 10.5) {
753
      total_parasitic_cc1 += offchip_ploss[20];
754
      total_parasitic_cc2 += offchip_ploss[20];
755
      total_parasitic_cc3 += offchip_ploss[20];
756
  } else if (current < 11) {
757
      total_parasitic_cc1 += offchip_ploss[21];
758
      total_parasitic_cc2 += offchip_ploss[21];
759
      total_parasitic_cc3 += offchip_ploss[21];
760
  } else if (current < 11.5) {
761
      total_parasitic_cc1 += offchip_ploss[22];
762
      total_parasitic_cc2 += offchip_ploss[22];
763
      total_parasitic_cc3 += offchip_ploss[22];
764
  } else if (current < 12) {
765
      total_parasitic_cc1 += offchip_ploss[23];
766
      total_parasitic_cc2 += offchip_ploss[23];
767
      total_parasitic_cc3 += offchip_ploss[23];
768
  } else if (current < 12.5) {
769
      total_parasitic_cc1 += offchip_ploss[24];
770
      total_parasitic_cc2 += offchip_ploss[24];
771
      total_parasitic_cc3 += offchip_ploss[24];
772
  } else if (current < 13) {
773
      total_parasitic_cc1 += offchip_ploss[25];
774
      total_parasitic_cc2 += offchip_ploss[25];
775
      total_parasitic_cc3 += offchip_ploss[25];
776
  } else {
777
      total_parasitic_cc1 += offchip_ploss[26];
778
      total_parasitic_cc2 += offchip_ploss[26];
779
      total_parasitic_cc3 += offchip_ploss[26];
780
  }
781

    
782
  total_parasitic_cc1 += pow(current, 2) * PARASITIC_OHM;
783
  total_parasitic_cc2 += pow(current, 2) * PARASITIC_OHM;
784
  total_parasitic_cc3 += pow(current, 2) * PARASITIC_OHM;
785

    
786
  // Onchip regulator paraisitc loss
787
  if (speed_grade == 0) {
788
      total_parasitic_cc1 += ONCHIP_VREG_LOSS_LOW;
789
      total_parasitic_cc2 += ONCHIP_VREG_LOSS_LOW;
790
      total_parasitic_cc3 += ONCHIP_VREG_LOSS_LOW;
791
  } else {
792
      total_parasitic_cc1 += ONCHIP_VREG_LOSS_HIGH;
793
      total_parasitic_cc2 += ONCHIP_VREG_LOSS_HIGH;
794
      total_parasitic_cc3 += ONCHIP_VREG_LOSS_HIGH;
795
  }
796

    
797
  max_cycle_power_cc1 = MAX(max_cycle_power_cc1,current_total_cycle_power_cc1);
798
  max_cycle_power_cc2 = MAX(max_cycle_power_cc2,current_total_cycle_power_cc2);
799
  max_cycle_power_cc3 = MAX(max_cycle_power_cc3,current_total_cycle_power_cc3);
800

    
801
  last_single_total_cycle_power_cc1 = total_cycle_power_cc1;
802
  last_single_total_cycle_power_cc2 = total_cycle_power_cc2;
803
  last_single_total_cycle_power_cc3 = total_cycle_power_cc3;
804

    
805
  cycle_count++;
806

    
807
  // here's where we change VFI levels
808
  diff_dispatch = sim_total_insn - last_sim_total_insn;
809
  diff_commit = sim_num_insn - last_sim_num_insn;
810
  
811
  diff_dispatch_sum += diff_dispatch;
812
  diff_commit_sum += diff_commit;
813

    
814
  hist_dispatch[hist_idx] = diff_dispatch;
815
  hist_commit[hist_idx] = diff_commit;
816
  hist_idx++;
817
  if(hist_idx >= SUM_OVER) {
818
    hist_idx = 0;
819
  }
820

    
821
  if(init_count >= SUM_OVER) {
822
      // Update speed
823
    speed_grade = speed_delay[SWITCH_CYCLES - 1];
824
    for (speed_idx = 0; speed_idx < SWITCH_CYCLES-1; speed_idx++) {
825

    
826
        speed_delay[speed_idx+1] = speed_delay[speed_idx];
827
    }
828

    
829
    diff_dispatch_sum -= hist_dispatch[hist_idx];
830
    diff_commit_sum -= hist_commit[hist_idx];
831

    
832
    if( diff_commit_sum < diff_dispatch_sum ) {
833
        speed_delay[0] = 0;
834
    }
835
    else if( diff_commit_sum >= diff_dispatch_sum ) {
836
        speed_delay[0] = 1;
837
    }
838

    
839
    if(speed_grade == 0) {
840
        slow_cycles++;
841
    }
842
    else {
843
        fast_cycles++;
844
    }
845

    
846
  } else {
847
    init_count++;
848
    fast_cycles++;
849

    
850
    for (speed_idx = 0; speed_idx < SWITCH_CYCLES; speed_idx++) {
851
        speed_delay[speed_idx] = 1;
852
    }
853
  }
854

    
855
//  if (diff_commit <= diff_dispatch) {
856
//      speed_grade = 0;
857
//  } else if (diff_commit > diff_dispatch) {
858
//      speed_grade = 1;
859
//  }
860

    
861
  if ((speed_grade == 0) && (last_speed_grade == 1)) {
862
      Mhz = Mhz / 2;
863
      Vdd = Vdd / 2;
864
      printf("Speed down!\n");
865
      last_switch_time = cycle_count;
866
  } else if ((speed_grade == 1) && (last_speed_grade == 0)) {
867
      Mhz = Mhz * 2;
868
      Vdd = Vdd * 2;
869
      printf("Speed up!\n");
870
      last_switch_time = cycle_count;
871
  }
872
#ifdef DVFS_FIX
873
  else if (last_switch_time < cycle_count-(SUM_OVER/3) && speed_grade==0 ) {
874
      speed_grade = 1;
875
      Mhz = Mhz * 2;
876
      Vdd = Vdd * 2;
877
      init_count = 0;
878
      last_switch_time = cycle_count;
879
      hist_idx = 0;
880
      diff_commit_sum = 0;
881
      diff_dispatch_sum = 0;
882
  }
883
#endif
884
      //printf("Vdd = %f, MHz = %f\n",Vdd,Mhz);
885

    
886
  if (speed_grade != last_speed_grade) {
887
    Period = 1/Mhz;
888
    SensePowerfactor3 = Mhz * Vbitsense * Vbitsense;
889
    SensePowerfactor2 = Mhz * (Vbitpre - Vbitsense) * (Vbitpre - Vbitsense);
890
    SensePowerfactor = (Mhz) * (Vdd/2) * (Vdd/2);
891
    Powerfactor = (Mhz) * (Vdd) * (Vdd);
892
    Sense2Powerfactor = Mhz * (2 * .3 + .1 * Vdd);
893
    LowSwingPowerfactor = Mhz * .2 * .2;
894
      calculate_power(&power);
895
  }
896

    
897
  last_speed_grade = speed_grade;
898

    
899
  // Update
900
  last_sim_num_insn  = sim_num_insn;
901
  last_sim_total_insn = sim_total_insn;
902

    
903
}
904

    
905
void
906
power_reg_stats(struct stat_sdb_t *sdb)        /* stats database */
907
{
908
  stat_reg_double(sdb, "rename_power", "total power usage of rename unit", &rename_power, 0, NULL);
909

    
910
  stat_reg_double(sdb, "bpred_power", "total power usage of bpred unit", &bpred_power, 0, NULL);
911

    
912
  stat_reg_double(sdb, "window_power", "total power usage of instruction window", &window_power, 0, NULL);
913

    
914
  stat_reg_double(sdb, "lsq_power", "total power usage of load/store queue", &lsq_power, 0, NULL);
915

    
916
  stat_reg_double(sdb, "regfile_power", "total power usage of arch. regfile", &regfile_power, 0, NULL);
917

    
918
  stat_reg_double(sdb, "icache_power", "total power usage of icache", &icache_power, 0, NULL);
919

    
920
  stat_reg_double(sdb, "dcache_power", "total power usage of dcache", &dcache_power, 0, NULL);
921

    
922
  stat_reg_double(sdb, "dcache2_power", "total power usage of dcache2", &dcache2_power, 0, NULL);
923

    
924
  stat_reg_double(sdb, "alu_power", "total power usage of alu", &alu_power, 0, NULL);
925

    
926
  stat_reg_double(sdb, "falu_power", "total power usage of falu", &falu_power, 0, NULL);
927

    
928
  stat_reg_double(sdb, "resultbus_power", "total power usage of resultbus", &resultbus_power, 0, NULL);
929

    
930
  stat_reg_double(sdb, "clock_power", "total power usage of clock", &clock_power, 0, NULL);
931

    
932
  stat_reg_formula(sdb, "avg_rename_power", "avg power usage of rename unit", "rename_power/sim_cycle", NULL);
933

    
934
  stat_reg_formula(sdb, "avg_bpred_power", "avg power usage of bpred unit", "bpred_power/sim_cycle", NULL);
935

    
936
  stat_reg_formula(sdb, "avg_window_power", "avg power usage of instruction window", "window_power/sim_cycle",  NULL);
937

    
938
  stat_reg_formula(sdb, "avg_lsq_power", "avg power usage of lsq", "lsq_power/sim_cycle",  NULL);
939

    
940
  stat_reg_formula(sdb, "avg_regfile_power", "avg power usage of arch. regfile", "regfile_power/sim_cycle",  NULL);
941

    
942
  stat_reg_formula(sdb, "avg_icache_power", "avg power usage of icache", "icache_power/sim_cycle",  NULL);
943

    
944
  stat_reg_formula(sdb, "avg_dcache_power", "avg power usage of dcache", "dcache_power/sim_cycle",  NULL);
945

    
946
  stat_reg_formula(sdb, "avg_dcache2_power", "avg power usage of dcache2", "dcache2_power/sim_cycle",  NULL);
947

    
948
  stat_reg_formula(sdb, "avg_alu_power", "avg power usage of alu", "alu_power/sim_cycle",  NULL);
949

    
950
  stat_reg_formula(sdb, "avg_falu_power", "avg power usage of falu", "falu_power/sim_cycle",  NULL);
951

    
952
  stat_reg_formula(sdb, "avg_resultbus_power", "avg power usage of resultbus", "resultbus_power/sim_cycle",  NULL);
953

    
954
  stat_reg_formula(sdb, "avg_clock_power", "avg power usage of clock", "clock_power/sim_cycle",  NULL);
955

    
956
  stat_reg_formula(sdb, "fetch_stage_power", "total power usage of fetch stage", "icache_power + bpred_power", NULL);
957

    
958
  stat_reg_formula(sdb, "dispatch_stage_power", "total power usage of dispatch stage", "rename_power", NULL);
959

    
960
  stat_reg_formula(sdb, "issue_stage_power", "total power usage of issue stage", "resultbus_power + alu_power + dcache_power + dcache2_power + window_power + lsq_power", NULL);
961

    
962
  stat_reg_formula(sdb, "avg_fetch_power", "average power of fetch unit per cycle", "(icache_power + bpred_power)/ sim_cycle", /* format */NULL);
963

    
964
  stat_reg_formula(sdb, "avg_dispatch_power", "average power of dispatch unit per cycle", "(rename_power)/ sim_cycle", /* format */NULL);
965

    
966
  stat_reg_formula(sdb, "avg_issue_power", "average power of issue unit per cycle", "(resultbus_power + alu_power + dcache_power + dcache2_power + window_power + lsq_power)/ sim_cycle", /* format */NULL);
967

    
968
  stat_reg_formula(sdb, "total_power", "total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power  + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)", NULL);
969

    
970
  stat_reg_formula(sdb, "avg_total_power_cycle", "average total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)/sim_cycle", NULL);
971

    
972
  stat_reg_formula(sdb, "avg_total_power_cycle_nofp_nod2", "average total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power - falu_power )/sim_cycle", NULL);
973

    
974
  stat_reg_formula(sdb, "avg_total_power_insn", "average total power per insn","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)/sim_total_insn", NULL);
975

    
976
  stat_reg_formula(sdb, "avg_total_power_insn_nofp_nod2", "average total power per insn","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power - falu_power )/sim_total_insn", NULL);
977

    
978
  stat_reg_double(sdb, "rename_power_cc1", "total power usage of rename unit_cc1", &rename_power_cc1, 0, NULL);
979

    
980
  stat_reg_double(sdb, "bpred_power_cc1", "total power usage of bpred unit_cc1", &bpred_power_cc1, 0, NULL);
981

    
982
  stat_reg_double(sdb, "window_power_cc1", "total power usage of instruction window_cc1", &window_power_cc1, 0, NULL);
983

    
984
  stat_reg_double(sdb, "lsq_power_cc1", "total power usage of lsq_cc1", &lsq_power_cc1, 0, NULL);
985

    
986
  stat_reg_double(sdb, "regfile_power_cc1", "total power usage of arch. regfile_cc1", &regfile_power_cc1, 0, NULL);
987

    
988
  stat_reg_double(sdb, "icache_power_cc1", "total power usage of icache_cc1", &icache_power_cc1, 0, NULL);
989

    
990
  stat_reg_double(sdb, "dcache_power_cc1", "total power usage of dcache_cc1", &dcache_power_cc1, 0, NULL);
991

    
992
  stat_reg_double(sdb, "dcache2_power_cc1", "total power usage of dcache2_cc1", &dcache2_power_cc1, 0, NULL);
993

    
994
  stat_reg_double(sdb, "alu_power_cc1", "total power usage of alu_cc1", &alu_power_cc1, 0, NULL);
995

    
996
  stat_reg_double(sdb, "resultbus_power_cc1", "total power usage of resultbus_cc1", &resultbus_power_cc1, 0, NULL);
997

    
998
  stat_reg_double(sdb, "clock_power_cc1", "total power usage of clock_cc1", &clock_power_cc1, 0, NULL);
999

    
1000
  stat_reg_formula(sdb, "avg_rename_power_cc1", "avg power usage of rename unit_cc1", "rename_power_cc1/sim_cycle", NULL);
1001

    
1002
  stat_reg_formula(sdb, "avg_bpred_power_cc1", "avg power usage of bpred unit_cc1", "bpred_power_cc1/sim_cycle", NULL);
1003

    
1004
  stat_reg_formula(sdb, "avg_window_power_cc1", "avg power usage of instruction window_cc1", "window_power_cc1/sim_cycle",  NULL);
1005

    
1006
  stat_reg_formula(sdb, "avg_lsq_power_cc1", "avg power usage of lsq_cc1", "lsq_power_cc1/sim_cycle",  NULL);
1007

    
1008
  stat_reg_formula(sdb, "avg_regfile_power_cc1", "avg power usage of arch. regfile_cc1", "regfile_power_cc1/sim_cycle",  NULL);
1009

    
1010
  stat_reg_formula(sdb, "avg_icache_power_cc1", "avg power usage of icache_cc1", "icache_power_cc1/sim_cycle",  NULL);
1011

    
1012
  stat_reg_formula(sdb, "avg_dcache_power_cc1", "avg power usage of dcache_cc1", "dcache_power_cc1/sim_cycle",  NULL);
1013

    
1014
  stat_reg_formula(sdb, "avg_dcache2_power_cc1", "avg power usage of dcache2_cc1", "dcache2_power_cc1/sim_cycle",  NULL);
1015

    
1016
  stat_reg_formula(sdb, "avg_alu_power_cc1", "avg power usage of alu_cc1", "alu_power_cc1/sim_cycle",  NULL);
1017

    
1018
  stat_reg_formula(sdb, "avg_resultbus_power_cc1", "avg power usage of resultbus_cc1", "resultbus_power_cc1/sim_cycle",  NULL);
1019

    
1020
  stat_reg_formula(sdb, "avg_clock_power_cc1", "avg power usage of clock_cc1", "clock_power_cc1/sim_cycle",  NULL);
1021

    
1022
  stat_reg_formula(sdb, "fetch_stage_power_cc1", "total power usage of fetch stage_cc1", "icache_power_cc1 + bpred_power_cc1", NULL);
1023

    
1024
  stat_reg_formula(sdb, "dispatch_stage_power_cc1", "total power usage of dispatch stage_cc1", "rename_power_cc1", NULL);
1025

    
1026
  stat_reg_formula(sdb, "issue_stage_power_cc1", "total power usage of issue stage_cc1", "resultbus_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1 + lsq_power_cc1 + window_power_cc1", NULL);
1027

    
1028
  stat_reg_formula(sdb, "avg_fetch_power_cc1", "average power of fetch unit per cycle_cc1", "(icache_power_cc1 + bpred_power_cc1)/ sim_cycle", /* format */NULL);
1029

    
1030
  stat_reg_formula(sdb, "avg_dispatch_power_cc1", "average power of dispatch unit per cycle_cc1", "(rename_power_cc1)/ sim_cycle", /* format */NULL);
1031

    
1032
  stat_reg_formula(sdb, "avg_issue_power_cc1", "average power of issue unit per cycle_cc1", "(resultbus_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1 + lsq_power_cc1 + window_power_cc1)/ sim_cycle", /* format */NULL);
1033

    
1034
  stat_reg_formula(sdb, "total_power_cycle_cc1", "total power per cycle_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1)", NULL);
1035

    
1036
  stat_reg_formula(sdb, "avg_total_power_cycle_cc1", "average total power per cycle_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 + alu_power_cc1 + dcache_power_cc1 +dcache2_power_cc1)/sim_cycle", NULL);
1037

    
1038
  stat_reg_formula(sdb, "avg_total_power_insn_cc1", "average total power per insn_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 +  alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1)/sim_total_insn", NULL);
1039

    
1040
  stat_reg_double(sdb, "rename_power_cc2", "total power usage of rename unit_cc2", &rename_power_cc2, 0, NULL);
1041

    
1042
  stat_reg_double(sdb, "bpred_power_cc2", "total power usage of bpred unit_cc2", &bpred_power_cc2, 0, NULL);
1043

    
1044
  stat_reg_double(sdb, "window_power_cc2", "total power usage of instruction window_cc2", &window_power_cc2, 0, NULL);
1045

    
1046
  stat_reg_double(sdb, "lsq_power_cc2", "total power usage of lsq_cc2", &lsq_power_cc2, 0, NULL);
1047

    
1048
  stat_reg_double(sdb, "regfile_power_cc2", "total power usage of arch. regfile_cc2", &regfile_power_cc2, 0, NULL);
1049

    
1050
  stat_reg_double(sdb, "icache_power_cc2", "total power usage of icache_cc2", &icache_power_cc2, 0, NULL);
1051

    
1052
  stat_reg_double(sdb, "dcache_power_cc2", "total power usage of dcache_cc2", &dcache_power_cc2, 0, NULL);
1053

    
1054
  stat_reg_double(sdb, "dcache2_power_cc2", "total power usage of dcache2_cc2", &dcache2_power_cc2, 0, NULL);
1055

    
1056
  stat_reg_double(sdb, "alu_power_cc2", "total power usage of alu_cc2", &alu_power_cc2, 0, NULL);
1057

    
1058
  stat_reg_double(sdb, "resultbus_power_cc2", "total power usage of resultbus_cc2", &resultbus_power_cc2, 0, NULL);
1059

    
1060
  stat_reg_double(sdb, "clock_power_cc2", "total power usage of clock_cc2", &clock_power_cc2, 0, NULL);
1061

    
1062
  stat_reg_formula(sdb, "avg_rename_power_cc2", "avg power usage of rename unit_cc2", "rename_power_cc2/sim_cycle", NULL);
1063

    
1064
  stat_reg_formula(sdb, "avg_bpred_power_cc2", "avg power usage of bpred unit_cc2", "bpred_power_cc2/sim_cycle", NULL);
1065

    
1066
  stat_reg_formula(sdb, "avg_window_power_cc2", "avg power usage of instruction window_cc2", "window_power_cc2/sim_cycle",  NULL);
1067

    
1068
  stat_reg_formula(sdb, "avg_lsq_power_cc2", "avg power usage of instruction lsq_cc2", "lsq_power_cc2/sim_cycle",  NULL);
1069

    
1070
  stat_reg_formula(sdb, "avg_regfile_power_cc2", "avg power usage of arch. regfile_cc2", "regfile_power_cc2/sim_cycle",  NULL);
1071

    
1072
  stat_reg_formula(sdb, "avg_icache_power_cc2", "avg power usage of icache_cc2", "icache_power_cc2/sim_cycle",  NULL);
1073

    
1074
  stat_reg_formula(sdb, "avg_dcache_power_cc2", "avg power usage of dcache_cc2", "dcache_power_cc2/sim_cycle",  NULL);
1075

    
1076
  stat_reg_formula(sdb, "avg_dcache2_power_cc2", "avg power usage of dcache2_cc2", "dcache2_power_cc2/sim_cycle",  NULL);
1077

    
1078
  stat_reg_formula(sdb, "avg_alu_power_cc2", "avg power usage of alu_cc2", "alu_power_cc2/sim_cycle",  NULL);
1079

    
1080
  stat_reg_formula(sdb, "avg_resultbus_power_cc2", "avg power usage of resultbus_cc2", "resultbus_power_cc2/sim_cycle",  NULL);
1081

    
1082
  stat_reg_formula(sdb, "avg_clock_power_cc2", "avg power usage of clock_cc2", "clock_power_cc2/sim_cycle",  NULL);
1083

    
1084
  stat_reg_formula(sdb, "fetch_stage_power_cc2", "total power usage of fetch stage_cc2", "icache_power_cc2 + bpred_power_cc2", NULL);
1085

    
1086
  stat_reg_formula(sdb, "dispatch_stage_power_cc2", "total power usage of dispatch stage_cc2", "rename_power_cc2", NULL);
1087

    
1088
  stat_reg_formula(sdb, "issue_stage_power_cc2", "total power usage of issue stage_cc2", "resultbus_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2 + lsq_power_cc2 + window_power_cc2", NULL);
1089

    
1090
  stat_reg_formula(sdb, "avg_fetch_power_cc2", "average power of fetch unit per cycle_cc2", "(icache_power_cc2 + bpred_power_cc2)/ sim_cycle", /* format */NULL);
1091

    
1092
  stat_reg_formula(sdb, "avg_dispatch_power_cc2", "average power of dispatch unit per cycle_cc2", "(rename_power_cc2)/ sim_cycle", /* format */NULL);
1093

    
1094
  stat_reg_formula(sdb, "avg_issue_power_cc2", "average power of issue unit per cycle_cc2", "(resultbus_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2 + lsq_power_cc2 + window_power_cc2)/ sim_cycle", /* format */NULL);
1095

    
1096
  stat_reg_formula(sdb, "total_power_cycle_cc2", "total power per cycle_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)", NULL);
1097

    
1098
  stat_reg_formula(sdb, "avg_total_power_cycle_cc2", "average total power per cycle_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)/sim_cycle", NULL);
1099

    
1100
  stat_reg_formula(sdb, "avg_total_power_insn_cc2", "average total power per insn_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)/sim_total_insn", NULL);
1101

    
1102
  stat_reg_double(sdb, "rename_power_cc3", "total power usage of rename unit_cc3", &rename_power_cc3, 0, NULL);
1103

    
1104
  stat_reg_double(sdb, "bpred_power_cc3", "total power usage of bpred unit_cc3", &bpred_power_cc3, 0, NULL);
1105

    
1106
  stat_reg_double(sdb, "window_power_cc3", "total power usage of instruction window_cc3", &window_power_cc3, 0, NULL);
1107

    
1108
  stat_reg_double(sdb, "lsq_power_cc3", "total power usage of lsq_cc3", &lsq_power_cc3, 0, NULL);
1109

    
1110
  stat_reg_double(sdb, "regfile_power_cc3", "total power usage of arch. regfile_cc3", &regfile_power_cc3, 0, NULL);
1111

    
1112
  stat_reg_double(sdb, "icache_power_cc3", "total power usage of icache_cc3", &icache_power_cc3, 0, NULL);
1113

    
1114
  stat_reg_double(sdb, "dcache_power_cc3", "total power usage of dcache_cc3", &dcache_power_cc3, 0, NULL);
1115

    
1116
  stat_reg_double(sdb, "dcache2_power_cc3", "total power usage of dcache2_cc3", &dcache2_power_cc3, 0, NULL);
1117

    
1118
  stat_reg_double(sdb, "alu_power_cc3", "total power usage of alu_cc3", &alu_power_cc3, 0, NULL);
1119

    
1120
  stat_reg_double(sdb, "resultbus_power_cc3", "total power usage of resultbus_cc3", &resultbus_power_cc3, 0, NULL);
1121

    
1122
  stat_reg_double(sdb, "clock_power_cc3", "total power usage of clock_cc3", &clock_power_cc3, 0, NULL);
1123

    
1124
  stat_reg_formula(sdb, "avg_rename_power_cc3", "avg power usage of rename unit_cc3", "rename_power_cc3/sim_cycle", NULL);
1125

    
1126
  stat_reg_formula(sdb, "avg_bpred_power_cc3", "avg power usage of bpred unit_cc3", "bpred_power_cc3/sim_cycle", NULL);
1127

    
1128
  stat_reg_formula(sdb, "avg_window_power_cc3", "avg power usage of instruction window_cc3", "window_power_cc3/sim_cycle",  NULL);
1129

    
1130
  stat_reg_formula(sdb, "avg_lsq_power_cc3", "avg power usage of instruction lsq_cc3", "lsq_power_cc3/sim_cycle",  NULL);
1131

    
1132
  stat_reg_formula(sdb, "avg_regfile_power_cc3", "avg power usage of arch. regfile_cc3", "regfile_power_cc3/sim_cycle",  NULL);
1133

    
1134
  stat_reg_formula(sdb, "avg_icache_power_cc3", "avg power usage of icache_cc3", "icache_power_cc3/sim_cycle",  NULL);
1135

    
1136
  stat_reg_formula(sdb, "avg_dcache_power_cc3", "avg power usage of dcache_cc3", "dcache_power_cc3/sim_cycle",  NULL);
1137

    
1138
  stat_reg_formula(sdb, "avg_dcache2_power_cc3", "avg power usage of dcache2_cc3", "dcache2_power_cc3/sim_cycle",  NULL);
1139

    
1140
  stat_reg_formula(sdb, "avg_alu_power_cc3", "avg power usage of alu_cc3", "alu_power_cc3/sim_cycle",  NULL);
1141

    
1142
  stat_reg_formula(sdb, "avg_resultbus_power_cc3", "avg power usage of resultbus_cc3", "resultbus_power_cc3/sim_cycle",  NULL);
1143

    
1144
  stat_reg_formula(sdb, "avg_clock_power_cc3", "avg power usage of clock_cc3", "clock_power_cc3/sim_cycle",  NULL);
1145

    
1146
  stat_reg_formula(sdb, "fetch_stage_power_cc3", "total power usage of fetch stage_cc3", "icache_power_cc3 + bpred_power_cc3", NULL);
1147

    
1148
  stat_reg_formula(sdb, "dispatch_stage_power_cc3", "total power usage of dispatch stage_cc3", "rename_power_cc3", NULL);
1149

    
1150
  stat_reg_formula(sdb, "issue_stage_power_cc3", "total power usage of issue stage_cc3", "resultbus_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3 + lsq_power_cc3 + window_power_cc3", NULL);
1151

    
1152
  stat_reg_formula(sdb, "avg_fetch_power_cc3", "average power of fetch unit per cycle_cc3", "(icache_power_cc3 + bpred_power_cc3)/ sim_cycle", /* format */NULL);
1153

    
1154
  stat_reg_formula(sdb, "avg_dispatch_power_cc3", "average power of dispatch unit per cycle_cc3", "(rename_power_cc3)/ sim_cycle", /* format */NULL);
1155

    
1156
  stat_reg_formula(sdb, "avg_issue_power_cc3", "average power of issue unit per cycle_cc3", "(resultbus_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3 + lsq_power_cc3 + window_power_cc3)/ sim_cycle", /* format */NULL);
1157

    
1158
  stat_reg_formula(sdb, "total_power_cycle_cc3", "total power per cycle_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)", NULL);
1159

    
1160
  stat_reg_formula(sdb, "avg_total_power_cycle_cc3", "average total power per cycle_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)/sim_cycle", NULL);
1161

    
1162
  stat_reg_formula(sdb, "avg_total_power_insn_cc3", "average total power per insn_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)/sim_total_insn", NULL);
1163

    
1164
  stat_reg_counter(sdb, "total_rename_access", "total number accesses of rename unit", &total_rename_access, 0, NULL);
1165

    
1166
  stat_reg_counter(sdb, "total_bpred_access", "total number accesses of bpred unit", &total_bpred_access, 0, NULL);
1167

    
1168
  stat_reg_counter(sdb, "total_window_access", "total number accesses of instruction window", &total_window_access, 0, NULL);
1169

    
1170
  stat_reg_counter(sdb, "total_lsq_access", "total number accesses of load/store queue", &total_lsq_access, 0, NULL);
1171

    
1172
  stat_reg_counter(sdb, "total_regfile_access", "total number accesses of arch. regfile", &total_regfile_access, 0, NULL);
1173

    
1174
  stat_reg_counter(sdb, "total_icache_access", "total number accesses of icache", &total_icache_access, 0, NULL);
1175

    
1176
  stat_reg_counter(sdb, "total_dcache_access", "total number accesses of dcache", &total_dcache_access, 0, NULL);
1177

    
1178
  stat_reg_counter(sdb, "total_dcache2_access", "total number accesses of dcache2", &total_dcache2_access, 0, NULL);
1179

    
1180
  stat_reg_counter(sdb, "total_alu_access", "total number accesses of alu", &total_alu_access, 0, NULL);
1181

    
1182
  stat_reg_counter(sdb, "total_resultbus_access", "total number accesses of resultbus", &total_resultbus_access, 0, NULL);
1183

    
1184
  stat_reg_formula(sdb, "avg_rename_access", "avg number accesses of rename unit", "total_rename_access/sim_cycle", NULL);
1185

    
1186
  stat_reg_formula(sdb, "avg_bpred_access", "avg number accesses of bpred unit", "total_bpred_access/sim_cycle", NULL);
1187

    
1188
  stat_reg_formula(sdb, "avg_window_access", "avg number accesses of instruction window", "total_window_access/sim_cycle",  NULL);
1189

    
1190
  stat_reg_formula(sdb, "avg_lsq_access", "avg number accesses of lsq", "total_lsq_access/sim_cycle",  NULL);
1191

    
1192
  stat_reg_formula(sdb, "avg_regfile_access", "avg number accesses of arch. regfile", "total_regfile_access/sim_cycle",  NULL);
1193

    
1194
  stat_reg_formula(sdb, "avg_icache_access", "avg number accesses of icache", "total_icache_access/sim_cycle",  NULL);
1195

    
1196
  stat_reg_formula(sdb, "avg_dcache_access", "avg number accesses of dcache", "total_dcache_access/sim_cycle",  NULL);
1197

    
1198
  stat_reg_formula(sdb, "avg_dcache2_access", "avg number accesses of dcache2", "total_dcache2_access/sim_cycle",  NULL);
1199

    
1200
  stat_reg_formula(sdb, "avg_alu_access", "avg number accesses of alu", "total_alu_access/sim_cycle",  NULL);
1201

    
1202
  stat_reg_formula(sdb, "avg_resultbus_access", "avg number accesses of resultbus", "total_resultbus_access/sim_cycle",  NULL);
1203

    
1204
  stat_reg_counter(sdb, "max_rename_access", "max number accesses of rename unit", &max_rename_access, 0, NULL);
1205

    
1206
  stat_reg_counter(sdb, "max_bpred_access", "max number accesses of bpred unit", &max_bpred_access, 0, NULL);
1207

    
1208
  stat_reg_counter(sdb, "max_window_access", "max number accesses of instruction window", &max_window_access, 0, NULL);
1209

    
1210
  stat_reg_counter(sdb, "max_lsq_access", "max number accesses of load/store queue", &max_lsq_access, 0, NULL);
1211

    
1212
  stat_reg_counter(sdb, "max_regfile_access", "max number accesses of arch. regfile", &max_regfile_access, 0, NULL);
1213

    
1214
  stat_reg_counter(sdb, "max_icache_access", "max number accesses of icache", &max_icache_access, 0, NULL);
1215

    
1216
  stat_reg_counter(sdb, "max_dcache_access", "max number accesses of dcache", &max_dcache_access, 0, NULL);
1217

    
1218
  stat_reg_counter(sdb, "max_dcache2_access", "max number accesses of dcache2", &max_dcache2_access, 0, NULL);
1219

    
1220
  stat_reg_counter(sdb, "max_alu_access", "max number accesses of alu", &max_alu_access, 0, NULL);
1221

    
1222
  stat_reg_counter(sdb, "max_resultbus_access", "max number accesses of resultbus", &max_resultbus_access, 0, NULL);
1223

    
1224
  stat_reg_double(sdb, "max_cycle_power_cc1", "maximum cycle power usage of cc1", &max_cycle_power_cc1, 0, NULL);
1225

    
1226
  stat_reg_double(sdb, "max_cycle_power_cc2", "maximum cycle power usage of cc2", &max_cycle_power_cc2, 0, NULL);
1227

    
1228
  stat_reg_double(sdb, "max_cycle_power_cc3", "maximum cycle power usage of cc3", &max_cycle_power_cc3, 0, NULL);
1229

    
1230
  stat_reg_double(sdb, "parasitic_power_cc1", "parasitic power cc1", &total_parasitic_cc1, 0, NULL);
1231
  stat_reg_double(sdb, "parasitic_power_cc2", "parasitic power cc2", &total_parasitic_cc2, 0, NULL);
1232
  stat_reg_double(sdb, "parasitic_power_cc3", "parasitic power cc3", &total_parasitic_cc3, 0, NULL);
1233
  stat_reg_double(sdb, "min amperage", "min amperage", &min_amp, 0, NULL);
1234
  stat_reg_double(sdb, "max amperage", "max amperage", &max_amp, 0, NULL);
1235
  stat_reg_double(sdb, "slow_cycles", "slow cycles", &slow_cycles, 0, NULL);
1236
  stat_reg_double(sdb, "fast_cycles", "fast cycles", &fast_cycles, 0, NULL);
1237
}
1238

    
1239

    
1240
/* this routine takes the number of rows and cols of an array structure
1241
   and attemps to make it make it more of a reasonable circuit structure
1242
   by trying to make the number of rows and cols as close as possible.
1243
   (scaling both by factors of 2 in opposite directions).  it returns
1244
   a scale factor which is the amount that the rows should be divided
1245
   by and the columns should be multiplied by.
1246
*/
1247
int squarify(int rows, int cols)
1248
{
1249
  int scale_factor = 1;
1250

    
1251
  if(rows == cols)
1252
    return 1;
1253

    
1254
  /*
1255
  printf("init rows == %d\n",rows);
1256
  printf("init cols == %d\n",cols);
1257
  */
1258

    
1259
  while(rows > cols) {
1260
    rows = rows/2;
1261
    cols = cols*2;
1262

    
1263
    /*
1264
    printf("rows == %d\n",rows);
1265
    printf("cols == %d\n",cols);
1266
    printf("scale_factor == %d (2^ == %d)\n\n",scale_factor,(int)pow(2.0,(double)scale_factor));
1267
    */
1268

    
1269
    if (rows/2 <= cols)
1270
      return((int)pow(2.0,(double)scale_factor));
1271
    scale_factor++;
1272
  }
1273

    
1274
  return 1;
1275
}
1276

    
1277
/* could improve squarify to work when rows < cols */
1278

    
1279
double squarify_new(int rows, int cols)
1280
{
1281
  double scale_factor = 0.0;
1282

    
1283
  if(rows==cols)
1284
    return(pow(2.0,scale_factor));
1285

    
1286
  while(rows > cols) {
1287
    rows = rows/2;
1288
    cols = cols*2;
1289
    if (rows <= cols)
1290
      return(pow(2.0,scale_factor));
1291
    scale_factor++;
1292
  }
1293

    
1294
  while(cols > rows) {
1295
    rows = rows*2;
1296
    cols = cols/2;
1297
    if (cols <= rows)
1298
      return(pow(2.0,scale_factor));
1299
    scale_factor--;
1300
  }
1301

    
1302
  return 1;
1303

    
1304
}
1305

    
1306
void dump_power_stats(power)
1307
     power_result_type *power;
1308
{
1309
  double total_power;
1310
  double bpred_power;
1311
  double rename_power;
1312
  double rat_power;
1313
  double dcl_power;
1314
  double lsq_power;
1315
  double window_power;
1316
  double wakeup_power;
1317
  double rs_power;
1318
  double lsq_wakeup_power;
1319
  double lsq_rs_power;
1320
  double regfile_power;
1321
  double reorder_power;
1322
  double icache_power;
1323
  double dcache_power;
1324
  double dcache2_power;
1325
  double dtlb_power;
1326
  double itlb_power;
1327
  double ambient_power = 2.0;
1328

    
1329
  icache_power = power->icache_power;
1330

    
1331
  dcache_power = power->dcache_power;
1332

    
1333
  dcache2_power = power->dcache2_power;
1334

    
1335
  itlb_power = power->itlb;
1336
  dtlb_power = power->dtlb;
1337

    
1338
  bpred_power = power->btb + power->local_predict + power->global_predict + 
1339
    power->chooser + power->ras;
1340

    
1341
  rat_power = power->rat_decoder + 
1342
    power->rat_wordline + power->rat_bitline + power->rat_senseamp;
1343

    
1344
  dcl_power = power->dcl_compare + power->dcl_pencode;
1345

    
1346
  rename_power = power->rat_power + power->dcl_power + power->inst_decoder_power;
1347

    
1348
  wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch + 
1349
    power->wakeup_ormatch;
1350
   
1351
  rs_power = power->rs_decoder + 
1352
    power->rs_wordline + power->rs_bitline + power->rs_senseamp;
1353

    
1354
  window_power = wakeup_power + rs_power + power->selection;
1355

    
1356
  lsq_rs_power = power->lsq_rs_decoder + 
1357
    power->lsq_rs_wordline + power->lsq_rs_bitline + power->lsq_rs_senseamp;
1358

    
1359
  lsq_wakeup_power = power->lsq_wakeup_tagdrive + 
1360
    power->lsq_wakeup_tagmatch + power->lsq_wakeup_ormatch;
1361

    
1362
  lsq_power = lsq_wakeup_power + lsq_rs_power;
1363

    
1364
  reorder_power = power->reorder_decoder + 
1365
    power->reorder_wordline + power->reorder_bitline + 
1366
    power->reorder_senseamp;
1367

    
1368
  regfile_power = power->regfile_decoder + 
1369
    power->regfile_wordline + power->regfile_bitline + 
1370
    power->regfile_senseamp;
1371

    
1372
  total_power = bpred_power + rename_power + window_power + regfile_power +
1373
    power->resultbus + lsq_power + 
1374
    icache_power + dcache_power + dcache2_power + 
1375
    dtlb_power + itlb_power + power->clock_power + power->ialu_power +
1376
    power->falu_power;
1377

    
1378
  fprintf(stderr,"\nProcessor Parameters:\n");
1379
  fprintf(stderr,"Issue Width: %d\n",ruu_issue_width);
1380
  fprintf(stderr,"Window Size: %d\n",RUU_size);
1381
  fprintf(stderr,"Number of Virtual Registers: %d\n",MD_NUM_IREGS);
1382
  fprintf(stderr,"Number of Physical Registers: %d\n",RUU_size);
1383
  fprintf(stderr,"Datapath Width: %d\n",data_width);
1384

    
1385
  fprintf(stderr,"Total Power Consumption: %g\n",total_power+ambient_power);
1386
  fprintf(stderr,"Branch Predictor Power Consumption: %g  (%.3g%%)\n",bpred_power,100*bpred_power/total_power);
1387
  fprintf(stderr," branch target buffer power (W): %g\n",power->btb);
1388
  fprintf(stderr," local predict power (W): %g\n",power->local_predict);
1389
  fprintf(stderr," global predict power (W): %g\n",power->global_predict);
1390
  fprintf(stderr," chooser power (W): %g\n",power->chooser);
1391
  fprintf(stderr," RAS power (W): %g\n",power->ras);
1392
  fprintf(stderr,"Rename Logic Power Consumption: %g  (%.3g%%)\n",rename_power,100*rename_power/total_power);
1393
  fprintf(stderr," Instruction Decode Power (W): %g\n",power->inst_decoder_power);
1394
  fprintf(stderr," RAT decode_power (W): %g\n",power->rat_decoder);
1395
  fprintf(stderr," RAT wordline_power (W): %g\n",power->rat_wordline);
1396
  fprintf(stderr," RAT bitline_power (W): %g\n",power->rat_bitline);
1397
  fprintf(stderr," DCL Comparators (W): %g\n",power->dcl_compare);
1398
  fprintf(stderr,"Instruction Window Power Consumption: %g  (%.3g%%)\n",window_power,100*window_power/total_power);
1399
  fprintf(stderr," tagdrive (W): %g\n",power->wakeup_tagdrive);
1400
  fprintf(stderr," tagmatch (W): %g\n",power->wakeup_tagmatch);
1401
  fprintf(stderr," Selection Logic (W): %g\n",power->selection);
1402
  fprintf(stderr," decode_power (W): %g\n",power->rs_decoder);
1403
  fprintf(stderr," wordline_power (W): %g\n",power->rs_wordline);
1404
  fprintf(stderr," bitline_power (W): %g\n",power->rs_bitline);
1405
  fprintf(stderr,"Load/Store Queue Power Consumption: %g  (%.3g%%)\n",lsq_power,100*lsq_power/total_power);
1406
  fprintf(stderr," tagdrive (W): %g\n",power->lsq_wakeup_tagdrive);
1407
  fprintf(stderr," tagmatch (W): %g\n",power->lsq_wakeup_tagmatch);
1408
  fprintf(stderr," decode_power (W): %g\n",power->lsq_rs_decoder);
1409
  fprintf(stderr," wordline_power (W): %g\n",power->lsq_rs_wordline);
1410
  fprintf(stderr," bitline_power (W): %g\n",power->lsq_rs_bitline);
1411
  fprintf(stderr,"Arch. Register File Power Consumption: %g  (%.3g%%)\n",regfile_power,100*regfile_power/total_power);
1412
  fprintf(stderr," decode_power (W): %g\n",power->regfile_decoder);
1413
  fprintf(stderr," wordline_power (W): %g\n",power->regfile_wordline);
1414
  fprintf(stderr," bitline_power (W): %g\n",power->regfile_bitline);
1415
  fprintf(stderr,"Result Bus Power Consumption: %g  (%.3g%%)\n",power->resultbus,100*power->resultbus/total_power);
1416
  fprintf(stderr,"Total Clock Power: %g  (%.3g%%)\n",power->clock_power,100*power->clock_power/total_power);
1417
  fprintf(stderr,"Int ALU Power: %g  (%.3g%%)\n",power->ialu_power,100*power->ialu_power/total_power);
1418
  fprintf(stderr,"FP ALU Power: %g  (%.3g%%)\n",power->falu_power,100*power->falu_power/total_power);
1419
  fprintf(stderr,"Instruction Cache Power Consumption: %g  (%.3g%%)\n",icache_power,100*icache_power/total_power);
1420
  fprintf(stderr," decode_power (W): %g\n",power->icache_decoder);
1421
  fprintf(stderr," wordline_power (W): %g\n",power->icache_wordline);
1422
  fprintf(stderr," bitline_power (W): %g\n",power->icache_bitline);
1423
  fprintf(stderr," senseamp_power (W): %g\n",power->icache_senseamp);
1424
  fprintf(stderr," tagarray_power (W): %g\n",power->icache_tagarray);
1425
  fprintf(stderr,"Itlb_power (W): %g (%.3g%%)\n",power->itlb,100*power->itlb/total_power);
1426
  fprintf(stderr,"Data Cache Power Consumption: %g  (%.3g%%)\n",dcache_power,100*dcache_power/total_power);
1427
  fprintf(stderr," decode_power (W): %g\n",power->dcache_decoder);
1428
  fprintf(stderr," wordline_power (W): %g\n",power->dcache_wordline);
1429
  fprintf(stderr," bitline_power (W): %g\n",power->dcache_bitline);
1430
  fprintf(stderr," senseamp_power (W): %g\n",power->dcache_senseamp);
1431
  fprintf(stderr," tagarray_power (W): %g\n",power->dcache_tagarray);
1432
  fprintf(stderr,"Dtlb_power (W): %g (%.3g%%)\n",power->dtlb,100*power->dtlb/total_power);
1433
  fprintf(stderr,"Level 2 Cache Power Consumption: %g (%.3g%%)\n",dcache2_power,100*dcache2_power/total_power);
1434
  fprintf(stderr," decode_power (W): %g\n",power->dcache2_decoder);
1435
  fprintf(stderr," wordline_power (W): %g\n",power->dcache2_wordline);
1436
  fprintf(stderr," bitline_power (W): %g\n",power->dcache2_bitline);
1437
  fprintf(stderr," senseamp_power (W): %g\n",power->dcache2_senseamp);
1438
  fprintf(stderr," tagarray_power (W): %g\n",power->dcache2_tagarray);
1439
}
1440

    
1441
/*======================================================================*/
1442

    
1443

    
1444

    
1445
/* 
1446
 * This part of the code contains routines for each section as
1447
 * described in the tech report.  See the tech report for more details
1448
 * and explanations */
1449

    
1450
/*----------------------------------------------------------------------*/
1451

    
1452
double driver_size(double driving_cap, double desiredrisetime) {
1453
  double nsize, psize;
1454
  double Rpdrive; 
1455

    
1456
  Rpdrive = desiredrisetime/(driving_cap*log(VSINV)*-1.0);
1457
  psize = restowidth(Rpdrive,PCH);
1458
  nsize = restowidth(Rpdrive,NCH);
1459
  if (psize > Wworddrivemax) {
1460
    psize = Wworddrivemax;
1461
  }
1462
  if (psize < 4.0 * LSCALE)
1463
    psize = 4.0 * LSCALE;
1464

    
1465
  return (psize);
1466

    
1467
}
1468

    
1469
/* Decoder delay:  (see section 6.1 of tech report) */
1470

    
1471
double array_decoder_power(rows,cols,predeclength,rports,wports,cache)
1472
     int rows,cols;
1473
     double predeclength;
1474
     int rports,wports;
1475
     int cache;
1476
{
1477
  double Ctotal=0;
1478
  double Ceq=0;
1479
  int numstack;
1480
  int decode_bits=0;
1481
  int ports;
1482
  double rowsb;
1483

    
1484
  /* read and write ports are the same here */
1485
  ports = rports + wports;
1486

    
1487
  rowsb = (double)rows;
1488

    
1489
  /* number of input bits to be decoded */
1490
  decode_bits=ceil((logtwo(rowsb)));
1491

    
1492
  /* First stage: driving the decoders */
1493

    
1494
  /* This is the capacitance for driving one bit (and its complement).
1495
     -There are #rowsb 3->8 decoders contributing gatecap.
1496
     - 2.0 factor from 2 identical sets of drivers in parallel
1497
  */
1498
  Ceq = 2.0*(draincap(Wdecdrivep,PCH,1)+draincap(Wdecdriven,NCH,1)) +
1499
    gatecap(Wdec3to8n+Wdec3to8p,10.0)*rowsb;
1500

    
1501
  /* There are ports * #decode_bits total */
1502
  Ctotal+=ports*decode_bits*Ceq;
1503

    
1504
  if(verbose)
1505
    fprintf(stderr,"Decoder -- Driving decoders            == %g\n",.3*Ctotal*Powerfactor);
1506

    
1507
  /* second stage: driving a bunch of nor gates with a nand 
1508
     numstack is the size of the nor gates -- ie. a 7-128 decoder has
1509
     3-input NAND followed by 3-input NOR  */
1510

    
1511
  numstack = ceil((1.0/3.0)*logtwo(rows));
1512

    
1513
  if (numstack<=0) numstack = 1;
1514
  if (numstack>5) numstack = 5;
1515

    
1516
  /* There are #rowsb NOR gates being driven*/
1517
  Ceq = (3.0*draincap(Wdec3to8p,PCH,1) +draincap(Wdec3to8n,NCH,3) +
1518
         gatecap(WdecNORn+WdecNORp,((numstack*40)+20.0)))*rowsb;
1519

    
1520
  Ctotal+=ports*Ceq;
1521

    
1522
  if(verbose)
1523
    fprintf(stderr,"Decoder -- Driving nor w/ nand         == %g\n",.3*ports*Ceq*Powerfactor);
1524

    
1525
  /* Final stage: driving an inverter with the nor 
1526
     (inverter preceding wordline driver) -- wordline driver is in the next section*/
1527

    
1528
  Ceq = (gatecap(Wdecinvn+Wdecinvp,20.0)+
1529
         numstack*draincap(WdecNORn,NCH,1)+
1530
         draincap(WdecNORp,PCH,numstack));
1531

    
1532
  if(verbose)
1533
    fprintf(stderr,"Decoder -- Driving inverter w/ nor     == %g\n",.3*ports*Ceq*Powerfactor);
1534

    
1535
  Ctotal+=ports*Ceq;
1536

    
1537
  /* assume Activity Factor == .3  */
1538

    
1539
  return(.3*Ctotal*Powerfactor);
1540
}
1541

    
1542
double simple_array_decoder_power(rows,cols,rports,wports,cache)
1543
     int rows,cols;
1544
     int rports,wports;
1545
     int cache;
1546
{
1547
  double predeclength=0.0;
1548
  return(array_decoder_power(rows,cols,predeclength,rports,wports,cache));
1549
}
1550

    
1551

    
1552
double array_wordline_power(rows,cols,wordlinelength,rports,wports,cache)
1553
     int rows,cols;
1554
     double wordlinelength;
1555
     int rports,wports;
1556
     int cache;
1557
{
1558
  double Ctotal=0;
1559
  double Ceq=0;
1560
  double Cline=0;
1561
  double Cliner, Clinew=0;
1562
  double desiredrisetime,psize,nsize;
1563
  int ports;
1564
  double colsb;
1565

    
1566
  ports = rports+wports;
1567

    
1568
  colsb = (double)cols;
1569

    
1570
  /* Calculate size of wordline drivers assuming rise time == Period / 8 
1571
     - estimate cap on line 
1572
     - compute min resistance to achieve this with RC 
1573
     - compute width needed to achieve this resistance */
1574

    
1575
  desiredrisetime = Period/16;
1576
  Cline = (gatecappass(Wmemcellr,1.0))*colsb + wordlinelength*CM3metal;
1577
  psize = driver_size(Cline,desiredrisetime);
1578
  
1579
  /* how do we want to do p-n ratioing? -- here we just assume the same ratio 
1580
     from an inverter pair  */
1581
  nsize = psize * Wdecinvn/Wdecinvp; 
1582
  
1583
  if(verbose)
1584
    fprintf(stderr,"Wordline Driver Sizes -- nsize == %f, psize == %f\n",nsize,psize);
1585

    
1586
  Ceq = draincap(Wdecinvn,NCH,1) + draincap(Wdecinvp,PCH,1) +
1587
    gatecap(nsize+psize,20.0);
1588

    
1589
  Ctotal+=ports*Ceq;
1590

    
1591
  if(verbose)
1592
    fprintf(stderr,"Wordline -- Inverter -> Driver         == %g\n",ports*Ceq*Powerfactor);
1593

    
1594
  /* Compute caps of read wordline and write wordlines 
1595
     - wordline driver caps, given computed width from above
1596
     - read wordlines have 1 nmos access tx, size ~4
1597
     - write wordlines have 2 nmos access tx, size ~2
1598
     - metal line cap
1599
  */
1600

    
1601
  Cliner = (gatecappass(Wmemcellr,(BitWidth-2*Wmemcellr)/2.0))*colsb+
1602
    wordlinelength*CM3metal+
1603
    2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1604
  Clinew = (2.0*gatecappass(Wmemcellw,(BitWidth-2*Wmemcellw)/2.0))*colsb+
1605
    wordlinelength*CM3metal+
1606
    2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1607

    
1608
  if(verbose) {
1609
    fprintf(stderr,"Wordline -- Line                       == %g\n",1e12*Cline);
1610
    fprintf(stderr,"Wordline -- Line -- access -- gatecap  == %g\n",1e12*colsb*2*gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0));
1611
    fprintf(stderr,"Wordline -- Line -- driver -- draincap == %g\n",1e12*draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1612
    fprintf(stderr,"Wordline -- Line -- metal              == %g\n",1e12*wordlinelength*CM3metal);
1613
  }
1614
  Ctotal+=rports*Cliner+wports*Clinew;
1615

    
1616
  /* AF == 1 assuming a different wordline is charged each cycle, but only
1617
     1 wordline (per port) is actually used */
1618

    
1619
  return(Ctotal*Powerfactor);
1620
}
1621

    
1622
double simple_array_wordline_power(rows,cols,rports,wports,cache)
1623
     int rows,cols;
1624
     int rports,wports;
1625
     int cache;
1626
{
1627
  double wordlinelength;
1628
  int ports = rports + wports;
1629
  wordlinelength = cols *  (RegCellWidth + 2 * ports * BitlineSpacing);
1630
  return(array_wordline_power(rows,cols,wordlinelength,rports,wports,cache));
1631
}
1632

    
1633

    
1634
double array_bitline_power(rows,cols,bitlinelength,rports,wports,cache)
1635
     int rows,cols;
1636
     double bitlinelength;
1637
     int rports,wports;
1638
     int cache;
1639
{
1640
  double Ctotal=0;
1641
  double Ccolmux=0;
1642
  double Cbitrowr=0;
1643
  double Cbitroww=0;
1644
  double Cprerow=0;
1645
  double Cwritebitdrive=0;
1646
  double Cpregate=0;
1647
  double Cliner=0;
1648
  double Clinew=0;
1649
  int ports;
1650
  double rowsb;
1651
  double colsb;
1652

    
1653
  double desiredrisetime, Cline, psize, nsize;
1654

    
1655
  ports = rports + wports;
1656

    
1657
  rowsb = (double)rows;
1658
  colsb = (double)cols;
1659

    
1660
  /* Draincaps of access tx's */
1661

    
1662
  Cbitrowr = draincap(Wmemcellr,NCH,1);
1663
  Cbitroww = draincap(Wmemcellw,NCH,1);
1664

    
1665
  /* Cprerow -- precharge cap on the bitline
1666
     -simple scheme to estimate size of pre-charge tx's in a similar fashion
1667
      to wordline driver size estimation.
1668
     -FIXME: it would be better to use precharge/keeper pairs, i've omitted this
1669
      from this version because it couldn't autosize as easily.
1670
  */
1671

    
1672
  desiredrisetime = Period/8;
1673

    
1674
  Cline = rowsb*Cbitrowr+CM2metal*bitlinelength;
1675
  psize = driver_size(Cline,desiredrisetime);
1676

    
1677
  /* compensate for not having an nmos pre-charging */
1678
  psize = psize + psize * Wdecinvn/Wdecinvp; 
1679

    
1680
  if(verbose)
1681
    printf("Cprerow auto   == %g (psize == %g)\n",draincap(psize,PCH,1),psize);
1682

    
1683
  Cprerow = draincap(psize,PCH,1);
1684

    
1685
  /* Cpregate -- cap due to gatecap of precharge transistors -- tack this
1686
     onto bitline cap, again this could have a keeper */
1687
  Cpregate = 4.0*gatecap(psize,10.0);
1688
  global_clockcap+=rports*cols*2.0*Cpregate;
1689

    
1690
  /* Cwritebitdrive -- write bitline drivers are used instead of the precharge
1691
     stuff for write bitlines
1692
     - 2 inverter drivers within each driver pair */
1693

    
1694
  Cline = rowsb*Cbitroww+CM2metal*bitlinelength;
1695

    
1696
  psize = driver_size(Cline,desiredrisetime);
1697
  nsize = psize * Wdecinvn/Wdecinvp; 
1698

    
1699
  Cwritebitdrive = 2.0*(draincap(psize,PCH,1)+draincap(nsize,NCH,1));
1700

    
1701
  /* 
1702
     reg files (cache==0) 
1703
     => single ended bitlines (1 bitline/col)
1704
     => AFs from pop_count
1705
     caches (cache ==1)
1706
     => double-ended bitlines (2 bitlines/col)
1707
     => AFs = .5 (since one of the two bitlines is always charging/discharging)
1708
  */
1709

    
1710
#ifdef STATIC_AF
1711
  if (cache == 0) {
1712
    /* compute the total line cap for read/write bitlines */
1713
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
1714
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1715

    
1716
    /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
1717
       in cache styles) */
1718
    Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
1719
    Ctotal+=(1.0-POPCOUNT_AF)*rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
1720
    Ctotal+=.3*wports*cols*(Clinew+Cwritebitdrive);
1721
  } 
1722
  else { 
1723
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
1724
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1725
    Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
1726
    Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
1727
    Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
1728
  }
1729
#else
1730
  if (cache == 0) {
1731
    /* compute the total line cap for read/write bitlines */
1732
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
1733
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1734

    
1735
    /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
1736
       in cache styles) */
1737
    Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
1738
    Ctotal += rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
1739
    Ctotal += .3*wports*cols*(Clinew+Cwritebitdrive);
1740
  } 
1741
  else { 
1742
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
1743
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1744
    Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
1745
    Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
1746
    Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
1747
  }
1748
#endif
1749

    
1750
  if(verbose) {
1751
    fprintf(stderr,"Bitline -- Precharge                   == %g\n",1e12*Cpregate);
1752
    fprintf(stderr,"Bitline -- Line                        == %g\n",1e12*(Cliner+Clinew));
1753
    fprintf(stderr,"Bitline -- Line -- access draincap     == %g\n",1e12*rowsb*Cbitrowr);
1754
    fprintf(stderr,"Bitline -- Line -- precharge draincap  == %g\n",1e12*Cprerow);
1755
    fprintf(stderr,"Bitline -- Line -- metal               == %g\n",1e12*bitlinelength*CM2metal);
1756
    fprintf(stderr,"Bitline -- Colmux                      == %g\n",1e12*Ccolmux);
1757

    
1758
    fprintf(stderr,"\n");
1759
  }
1760

    
1761

    
1762
  if(cache==0)
1763
    return(Ctotal*Powerfactor);
1764
  else
1765
    return(Ctotal*SensePowerfactor*.4);
1766
  
1767
}
1768

    
1769

    
1770
double simple_array_bitline_power(rows,cols,rports,wports,cache)
1771
     int rows,cols;
1772
     int rports,wports;
1773
     int cache;
1774
{
1775
  double bitlinelength;
1776

    
1777
  int ports = rports + wports;
1778

    
1779
  bitlinelength = rows * (RegCellHeight + ports * WordlineSpacing);
1780

    
1781
  return (array_bitline_power(rows,cols,bitlinelength,rports,wports,cache));
1782

    
1783
}
1784

    
1785
/* estimate senseamp power dissipation in cache structures (Zyuban's method) */
1786
double senseamp_power(int cols)
1787
{
1788
  return((double)cols * Vdd/8 * .5e-3);
1789
}
1790

    
1791
/* estimate comparator power consumption (this comparator is similar
1792
   to the tag-match structure in a CAM */
1793
double compare_cap(int compare_bits)
1794
{
1795
  double c1, c2;
1796
  /* bottom part of comparator */
1797
  c2 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2))+
1798
    draincap(Wevalinvp,PCH,1) + draincap(Wevalinvn,NCH,1);
1799

    
1800
  /* top part of comparator */
1801
  c1 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2)+
1802
                       draincap(Wcomppreequ,NCH,1)) +
1803
    gatecap(WdecNORn,1.0)+
1804
    gatecap(WdecNORp,3.0);
1805

    
1806
  return(c1 + c2);
1807
}
1808

    
1809
/* power of depency check logic */
1810
double dcl_compare_power(int compare_bits)
1811
{
1812
  double Ctotal;
1813
  int num_comparators;
1814
  
1815
  num_comparators = (ruu_decode_width - 1) * (ruu_decode_width);
1816

    
1817
  Ctotal = num_comparators * compare_cap(compare_bits);
1818

    
1819
  return(Ctotal*Powerfactor*AF);
1820
}
1821

    
1822
double simple_array_power(rows,cols,rports,wports,cache)
1823
     int rows,cols;
1824
     int rports,wports;
1825
     int cache;
1826
{
1827
  if(cache==0)
1828
    return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
1829
            simple_array_wordline_power(rows,cols,rports,wports,cache)+
1830
            simple_array_bitline_power(rows,cols,rports,wports,cache));
1831
  else
1832
    return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
1833
            simple_array_wordline_power(rows,cols,rports,wports,cache)+
1834
            simple_array_bitline_power(rows,cols,rports,wports,cache)+
1835
            senseamp_power(cols));
1836
}
1837

    
1838

    
1839
double cam_tagdrive(rows,cols,rports,wports)
1840
     int rows,cols,rports,wports;
1841
{
1842
  double Ctotal, Ctlcap, Cblcap, Cwlcap;
1843
  double taglinelength;
1844
  double wordlinelength;
1845
  double nsize, psize;
1846
  int ports;
1847
  Ctotal=0;
1848

    
1849
  ports = rports + wports;
1850

    
1851
  taglinelength = rows * 
1852
    (CamCellHeight + ports * MatchlineSpacing);
1853

    
1854
  wordlinelength = cols * 
1855
    (CamCellWidth + ports * TaglineSpacing);
1856

    
1857
  /* Compute tagline cap */
1858
  Ctlcap = Cmetal * taglinelength + 
1859
    rows * gatecappass(Wcomparen2,2.0) +
1860
    draincap(Wcompdrivern,NCH,1)+draincap(Wcompdriverp,PCH,1);
1861

    
1862
  /* Compute bitline cap (for writing new tags) */
1863
  Cblcap = Cmetal * taglinelength +
1864
    rows * draincap(Wmemcellr,NCH,2);
1865

    
1866
  /* autosize wordline driver */
1867
  psize = driver_size(Cmetal * wordlinelength + 2 * cols * gatecap(Wmemcellr,2.0),Period/8);
1868
  nsize = psize * Wdecinvn/Wdecinvp; 
1869

    
1870
  /* Compute wordline cap (for writing new tags) */
1871
  Cwlcap = Cmetal * wordlinelength + 
1872
    draincap(nsize,NCH,1)+draincap(psize,PCH,1) +
1873
    2 * cols * gatecap(Wmemcellr,2.0);
1874
    
1875
  Ctotal += (rports * cols * 2 * Ctlcap) + 
1876
    (wports * ((cols * 2 * Cblcap) + (rows * Cwlcap)));
1877

    
1878
  return(Ctotal*Powerfactor*AF);
1879
}
1880

    
1881
double cam_tagmatch(rows,cols,rports,wports)
1882
     int rows,cols,rports,wports;
1883
{
1884
  double Ctotal, Cmlcap;
1885
  double matchlinelength;
1886
  int ports;
1887
  Ctotal=0;
1888

    
1889
  ports = rports + wports;
1890

    
1891
  matchlinelength = cols * 
1892
    (CamCellWidth + ports * TaglineSpacing);
1893

    
1894
  Cmlcap = 2 * cols * draincap(Wcomparen1,NCH,2) + 
1895
    Cmetal * matchlinelength + draincap(Wmatchpchg,NCH,1) +
1896
    gatecap(Wmatchinvn+Wmatchinvp,10.0) +
1897
    gatecap(Wmatchnandn+Wmatchnandp,10.0);
1898

    
1899
  Ctotal += rports * rows * Cmlcap;
1900

    
1901
  global_clockcap += rports * rows * gatecap(Wmatchpchg,5.0);
1902
  
1903
  /* noring the nanded match lines */
1904
  if(ruu_issue_width >= 8)
1905
    Ctotal += 2 * gatecap(Wmatchnorn+Wmatchnorp,10.0);
1906

    
1907
  return(Ctotal*Powerfactor*AF);
1908
}
1909

    
1910
double cam_array(rows,cols,rports,wports)
1911
     int rows,cols,rports,wports;
1912
{
1913
  return(cam_tagdrive(rows,cols,rports,wports) +
1914
         cam_tagmatch(rows,cols,rports,wports));
1915
}
1916

    
1917

    
1918
double selection_power(int win_entries)
1919
{
1920
  double Ctotal, Cor, Cpencode;
1921
  int num_arbiter=1;
1922

    
1923
  Ctotal=0;
1924

    
1925
  while(win_entries > 4)
1926
    {
1927
      win_entries = (int)ceil((double)win_entries / 4.0);
1928
      num_arbiter += win_entries;
1929
    }
1930

    
1931
  Cor = 4 * draincap(WSelORn,NCH,1) + draincap(WSelORprequ,PCH,1);
1932

    
1933
  Cpencode = draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,1) + 
1934
    2*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,2) + 
1935
    3*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,3) + 
1936
    4*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,4) + 
1937
    4*gatecap(WSelEnn+WSelEnp,20.0) + 
1938
    4*draincap(WSelEnn,NCH,1) + 4*draincap(WSelEnp,PCH,1);
1939

    
1940
  Ctotal += ruu_issue_width * num_arbiter*(Cor+Cpencode);
1941

    
1942
  return(Ctotal*Powerfactor*AF);
1943
}
1944

    
1945
/* very rough clock power estimates */
1946
double total_clockpower(double die_length)
1947
{
1948

    
1949
  double clocklinelength;
1950
  double Cline,Cline2,Ctotal;
1951
  double pipereg_clockcap=0;
1952
  double global_buffercap = 0;
1953
  double Clockpower;
1954

    
1955
  double num_piperegs;
1956

    
1957
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
1958

    
1959
  /* Assume say 8 stages (kinda low now).
1960
     FIXME: this could be a lot better; user could input
1961
     number of pipestages, etc  */
1962

    
1963
  /* assume 8 pipe stages and try to estimate bits per pipe stage */
1964
  /* pipe stage 0/1 */
1965
  num_piperegs = ruu_issue_width*inst_length + data_width;
1966
  /* pipe stage 1/2 */
1967
  num_piperegs += ruu_issue_width*(inst_length + 3 * RUU_size);
1968
  /* pipe stage 2/3 */
1969
  num_piperegs += ruu_issue_width*(inst_length + 3 * RUU_size);
1970
  /* pipe stage 3/4 */
1971
  num_piperegs += ruu_issue_width*(3 * npreg_width + pow2(opcode_length));
1972
  /* pipe stage 4/5 */
1973
  num_piperegs += ruu_issue_width*(2*data_width + pow2(opcode_length));
1974
  /* pipe stage 5/6 */
1975
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1976
  /* pipe stage 6/7 */
1977
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1978
  /* pipe stage 7/8 */
1979
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1980

    
1981
  /* assume 50% extra in control signals (rule of thumb) */
1982
  num_piperegs = num_piperegs * 1.5;
1983

    
1984
  pipereg_clockcap = num_piperegs * 4*gatecap(10.0,0);
1985

    
1986
  /* estimate based on 3% of die being in clock metal */
1987
  Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
1988

    
1989
  /* another estimate */
1990
  clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
1991
  Cline = 20 * Cmetal * (clocklinelength) * 1e6;
1992
  global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
1993
  /* global_clockcap is computed within each array structure for pre-charge tx's*/
1994
  Ctotal = Cline+global_clockcap+pipereg_clockcap+global_buffercap;
1995

    
1996
  if(verbose)
1997
    fprintf(stderr,"num_piperegs == %f\n",num_piperegs);
1998

    
1999
  /* add I_ADD Clockcap and F_ADD Clockcap */
2000
  Clockpower = Ctotal*Powerfactor + res_ialu*I_ADD_CLOCK + res_fpalu*F_ADD_CLOCK;
2001

    
2002
  if(verbose) {
2003
    fprintf(stderr,"Global Clock Power: %g\n",Clockpower);
2004
    fprintf(stderr," Global Metal Lines   (W): %g\n",Cline*Powerfactor);
2005
    fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
2006
    fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
2007
    fprintf(stderr," Global Clock Cap (Explicit) (W): %g\n",global_clockcap*Powerfactor+I_ADD_CLOCK+F_ADD_CLOCK);
2008
    fprintf(stderr," Global Clock Cap (Implicit) (W): %g\n",pipereg_clockcap*Powerfactor);
2009
  }
2010
  return(Clockpower);
2011

    
2012
}
2013

    
2014
/* very rough global clock power estimates */
2015
double global_clockpower(double die_length)
2016
{
2017

    
2018
  double clocklinelength;
2019
  double Cline,Cline2,Ctotal;
2020
  double global_buffercap = 0;
2021

    
2022
  Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
2023

    
2024
  clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
2025
  Cline = 20 * Cmetal * (clocklinelength) * 1e6;
2026
  global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
2027
  Ctotal = Cline+global_buffercap;
2028

    
2029
  if(verbose) {
2030
    fprintf(stderr,"Global Clock Power: %g\n",Ctotal*Powerfactor);
2031
    fprintf(stderr," Global Metal Lines   (W): %g\n",Cline*Powerfactor);
2032
    fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
2033
    fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
2034
  }
2035

    
2036
  return(Ctotal*Powerfactor);
2037

    
2038
}
2039

    
2040

    
2041
double compute_resultbus_power()
2042
{
2043
  double Ctotal, Cline;
2044

    
2045
  double regfile_height;
2046

    
2047
  /* compute size of result bus tags */
2048
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
2049

    
2050
  Ctotal=0;
2051

    
2052
  regfile_height = RUU_size * (RegCellHeight + 
2053
                               WordlineSpacing * 3 * ruu_issue_width); 
2054

    
2055
  /* assume num alu's == ialu  (FIXME: generate a more detailed result bus network model*/
2056
  Cline = Cmetal * (regfile_height + .5 * res_ialu * 3200.0 * LSCALE);
2057

    
2058
  /* or use result bus length measured from 21264 die photo */
2059
  /*  Cline = Cmetal * 3.3*1000;*/
2060

    
2061
  /* Assume ruu_issue_width result busses -- power can be scaled linearly
2062
     for number of result busses (scale by writeback_access) */
2063
  Ctotal += 2.0 * (data_width + npreg_width) * (ruu_issue_width)* Cline;
2064

    
2065
#ifdef STATIC_AF
2066
  return(Ctotal*Powerfactor*AF);
2067
#else
2068
  return(Ctotal*Powerfactor);
2069
#endif
2070
  
2071
}
2072

    
2073
void calculate_power(power)
2074
     power_result_type *power;
2075
{
2076
  double clockpower;
2077
  double predeclength, wordlinelength, bitlinelength;
2078
  int ndwl, ndbl, nspd, ntwl, ntbl, ntspd, c,b,a,cache, rowsb, colsb;
2079
  int trowsb, tcolsb, tagsize;
2080
  int va_size = 48;
2081

    
2082
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
2083

    
2084
  /* these variables are needed to use Cacti to auto-size cache arrays 
2085
     (for optimal delay) */
2086
  time_result_type time_result;
2087
  time_parameter_type time_parameters;
2088

    
2089
  /* used to autosize other structures, like bpred tables */
2090
  int scale_factor;
2091

    
2092
  global_clockcap = 0;
2093

    
2094
  cache=0;
2095

    
2096

    
2097
  /* FIXME: ALU power is a simple constant, it would be better
2098
     to include bit AFs and have different numbers for different
2099
     types of operations */
2100
  power->ialu_power = res_ialu * I_ADD;
2101
  power->falu_power = res_fpalu * F_ADD;
2102

    
2103
  nvreg_width = (int)ceil(logtwo((double)MD_NUM_IREGS));
2104
  npreg_width = (int)ceil(logtwo((double)RUU_size));
2105

    
2106

    
2107
  /* RAT has shadow bits stored in each cell, this makes the
2108
     cell size larger than normal array structures, so we must
2109
     compute it here */
2110

    
2111
  predeclength = MD_NUM_IREGS * 
2112
    (RatCellHeight + 3 * ruu_decode_width * WordlineSpacing);
2113

    
2114
  wordlinelength = npreg_width * 
2115
    (RatCellWidth + 
2116
     6 * ruu_decode_width * BitlineSpacing + 
2117
     RatShiftRegWidth*RatNumShift);
2118

    
2119
  bitlinelength = MD_NUM_IREGS * (RatCellHeight + 3 * ruu_decode_width * WordlineSpacing);
2120

    
2121
  if(verbose)
2122
    fprintf(stderr,"rat power stats\n");
2123
  power->rat_decoder = array_decoder_power(MD_NUM_IREGS,npreg_width,predeclength,2*ruu_decode_width,ruu_decode_width,cache);
2124
  power->rat_wordline = array_wordline_power(MD_NUM_IREGS,npreg_width,wordlinelength,2*ruu_decode_width,ruu_decode_width,cache);
2125
  power->rat_bitline = array_bitline_power(MD_NUM_IREGS,npreg_width,bitlinelength,2*ruu_decode_width,ruu_decode_width,cache);
2126
  power->rat_senseamp = 0;
2127

    
2128
  power->dcl_compare = dcl_compare_power(nvreg_width);
2129
  power->dcl_pencode = 0;
2130
  power->inst_decoder_power = ruu_decode_width * simple_array_decoder_power(opcode_length,1,1,1,cache);
2131
  power->wakeup_tagdrive =cam_tagdrive(RUU_size,npreg_width,ruu_issue_width,ruu_issue_width);
2132
  power->wakeup_tagmatch =cam_tagmatch(RUU_size,npreg_width,ruu_issue_width,ruu_issue_width);
2133
  power->wakeup_ormatch =0; 
2134

    
2135
  power->selection = selection_power(RUU_size);
2136

    
2137

    
2138
  predeclength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2139

    
2140
  wordlinelength = data_width * 
2141
    (RegCellWidth + 
2142
     6 * ruu_issue_width * BitlineSpacing);
2143

    
2144
  bitlinelength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2145

    
2146
  if(verbose)
2147
    fprintf(stderr,"regfile power stats\n");
2148

    
2149
  power->regfile_decoder = array_decoder_power(MD_NUM_IREGS,data_width,predeclength,2*ruu_issue_width,ruu_issue_width,cache);
2150
  power->regfile_wordline = array_wordline_power(MD_NUM_IREGS,data_width,wordlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2151
  power->regfile_bitline = array_bitline_power(MD_NUM_IREGS,data_width,bitlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2152
  power->regfile_senseamp =0;
2153

    
2154
  predeclength = RUU_size * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2155

    
2156
  wordlinelength = data_width * 
2157
    (RegCellWidth + 
2158
     6 * ruu_issue_width * BitlineSpacing);
2159

    
2160
  bitlinelength = RUU_size * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2161

    
2162
  if(verbose)
2163
    fprintf(stderr,"res station power stats\n");
2164
  power->rs_decoder = array_decoder_power(RUU_size,data_width,predeclength,2*ruu_issue_width,ruu_issue_width,cache);
2165
  power->rs_wordline = array_wordline_power(RUU_size,data_width,wordlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2166
  power->rs_bitline = array_bitline_power(RUU_size,data_width,bitlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2167
  /* no senseamps in reg file structures (only caches) */
2168
  power->rs_senseamp =0;
2169

    
2170
  /* addresses go into lsq tag's */
2171
  power->lsq_wakeup_tagdrive =cam_tagdrive(LSQ_size,data_width,res_memport,res_memport);
2172
  power->lsq_wakeup_tagmatch =cam_tagmatch(LSQ_size,data_width,res_memport,res_memport);
2173
  power->lsq_wakeup_ormatch =0; 
2174

    
2175
  wordlinelength = data_width * 
2176
    (RegCellWidth + 
2177
     4 * res_memport * BitlineSpacing);
2178

    
2179
  bitlinelength = RUU_size * (RegCellHeight + 4 * res_memport * WordlineSpacing);
2180

    
2181
  /* rs's hold data */
2182
  if(verbose)
2183
    fprintf(stderr,"lsq station power stats\n");
2184
  power->lsq_rs_decoder = array_decoder_power(LSQ_size,data_width,predeclength,res_memport,res_memport,cache);
2185
  power->lsq_rs_wordline = array_wordline_power(LSQ_size,data_width,wordlinelength,res_memport,res_memport,cache);
2186
  power->lsq_rs_bitline = array_bitline_power(LSQ_size,data_width,bitlinelength,res_memport,res_memport,cache);
2187
  power->lsq_rs_senseamp =0;
2188

    
2189
  power->resultbus = compute_resultbus_power();
2190

    
2191
  /* Load cache values into what cacti is expecting */
2192
  time_parameters.cache_size = btb_config[0] * (data_width/8) * btb_config[1]; /* C */
2193
  time_parameters.block_size = (data_width/8); /* B */
2194
  time_parameters.associativity = btb_config[1]; /* A */
2195
  time_parameters.number_of_sets = btb_config[0]; /* C/(B*A) */
2196

    
2197
  /* have Cacti compute optimal cache config */
2198
  calculate_time(&time_result,&time_parameters);
2199
  output_data(&time_result,&time_parameters);
2200

    
2201
  /* extract Cacti results */
2202
  ndwl=time_result.best_Ndwl;
2203
  ndbl=time_result.best_Ndbl;
2204
  nspd=time_result.best_Nspd;
2205
  ntwl=time_result.best_Ntwl;
2206
  ntbl=time_result.best_Ntbl;
2207
  ntspd=time_result.best_Ntspd;
2208
  c = time_parameters.cache_size;
2209
  b = time_parameters.block_size;
2210
  a = time_parameters.associativity; 
2211

    
2212
  cache=1;
2213

    
2214
  /* Figure out how many rows/cols there are now */
2215
  rowsb = c/(b*a*ndbl*nspd);
2216
  colsb = 8*b*a*nspd/ndwl;
2217

    
2218
  if(verbose) {
2219
    fprintf(stderr,"%d KB %d-way btb (%d-byte block size):\n",c,a,b);
2220
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2221
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2222
  }
2223

    
2224
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2225
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2226
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2227

    
2228
  if(verbose)
2229
    fprintf(stderr,"btb power stats\n");
2230
  power->btb = ndwl*ndbl*(array_decoder_power(rowsb,colsb,predeclength,1,1,cache) + array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache) + array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache) + senseamp_power(colsb));
2231

    
2232
  cache=1;
2233

    
2234
  scale_factor = squarify(twolev_config[0],twolev_config[2]);
2235
  predeclength = (twolev_config[0] / scale_factor)* (RegCellHeight + WordlineSpacing);
2236
  wordlinelength = twolev_config[2] * scale_factor *  (RegCellWidth + BitlineSpacing);
2237
  bitlinelength = (twolev_config[0] / scale_factor) * (RegCellHeight + WordlineSpacing);
2238

    
2239
  if(verbose)
2240
    fprintf(stderr,"local predict power stats\n");
2241

    
2242
  power->local_predict = array_decoder_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,bitlinelength,1,1,cache) + senseamp_power(twolev_config[2]*scale_factor);
2243

    
2244
  scale_factor = squarify(twolev_config[1],3);
2245

    
2246
  predeclength = (twolev_config[1] / scale_factor)* (RegCellHeight + WordlineSpacing);
2247
  wordlinelength = 3 * scale_factor *  (RegCellWidth + BitlineSpacing);
2248
  bitlinelength = (twolev_config[1] / scale_factor) * (RegCellHeight + WordlineSpacing);
2249

    
2250

    
2251
  if(verbose)
2252
    fprintf(stderr,"local predict power stats\n");
2253
  power->local_predict += array_decoder_power(twolev_config[1]/scale_factor,3*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[1]/scale_factor,3*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[1]/scale_factor,3*scale_factor,bitlinelength,1,1,cache) + senseamp_power(3*scale_factor);
2254

    
2255
  if(verbose)
2256
    fprintf(stderr,"bimod_config[0] == %d\n",bimod_config[0]);
2257

    
2258
  scale_factor = squarify(bimod_config[0],2);
2259

    
2260
  predeclength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2261
  wordlinelength = 2*scale_factor *  (RegCellWidth + BitlineSpacing);
2262
  bitlinelength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2263

    
2264

    
2265
  if(verbose)
2266
    fprintf(stderr,"global predict power stats\n");
2267
  power->global_predict = array_decoder_power(bimod_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(bimod_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(bimod_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
2268

    
2269
  scale_factor = squarify(comb_config[0],2);
2270

    
2271
  predeclength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2272
  wordlinelength = 2*scale_factor *  (RegCellWidth + BitlineSpacing);
2273
  bitlinelength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2274

    
2275
  if(verbose)
2276
    fprintf(stderr,"chooser predict power stats\n");
2277
  power->chooser = array_decoder_power(comb_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(comb_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(comb_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
2278

    
2279
  if(verbose)
2280
    fprintf(stderr,"RAS predict power stats\n");
2281
  power->ras = simple_array_power(ras_size,data_width,1,1,0);
2282

    
2283
  tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
2284

    
2285
  if(verbose)
2286
    fprintf(stderr,"dtlb predict power stats\n");
2287
  power->dtlb = res_memport*(cam_array(dtlb->nsets, va_size - (int)logtwo((double)dtlb->bsize),1,1) + simple_array_power(dtlb->nsets,tagsize,1,1,cache));
2288

    
2289
  tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
2290

    
2291
  predeclength = itlb->nsets * (RegCellHeight + WordlineSpacing);
2292
  wordlinelength = logtwo((double)itlb->bsize) * (RegCellWidth + BitlineSpacing);
2293
  bitlinelength = itlb->nsets * (RegCellHeight + WordlineSpacing);
2294

    
2295
  if(verbose)
2296
    fprintf(stderr,"itlb predict power stats\n");
2297
  power->itlb = cam_array(itlb->nsets, va_size - (int)logtwo((double)itlb->bsize),1,1) + simple_array_power(itlb->nsets,tagsize,1,1,cache);
2298

    
2299

    
2300
  cache=1;
2301

    
2302
  time_parameters.cache_size = cache_il1->nsets * cache_il1->bsize * cache_il1->assoc; /* C */
2303
  time_parameters.block_size = cache_il1->bsize; /* B */
2304
  time_parameters.associativity = cache_il1->assoc; /* A */
2305
  time_parameters.number_of_sets = cache_il1->nsets; /* C/(B*A) */
2306

    
2307
  calculate_time(&time_result,&time_parameters);
2308
  output_data(&time_result,&time_parameters);
2309

    
2310
  ndwl=time_result.best_Ndwl;
2311
  ndbl=time_result.best_Ndbl;
2312
  nspd=time_result.best_Nspd;
2313
  ntwl=time_result.best_Ntwl;
2314
  ntbl=time_result.best_Ntbl;
2315
  ntspd=time_result.best_Ntspd;
2316

    
2317
  c = time_parameters.cache_size;
2318
  b = time_parameters.block_size;
2319
  a = time_parameters.associativity;
2320

    
2321
  rowsb = c/(b*a*ndbl*nspd);
2322
  colsb = 8*b*a*nspd/ndwl;
2323

    
2324
  tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
2325
  trowsb = c/(b*a*ntbl*ntspd);
2326
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2327
 
2328
  if(verbose) {
2329
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2330
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2331
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2332
    fprintf(stderr,"tagsize == %d\n",tagsize);
2333
  }
2334

    
2335
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2336
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2337
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2338

    
2339
  if(verbose)
2340
    fprintf(stderr,"icache power stats\n");
2341
  power->icache_decoder = ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2342
  power->icache_wordline = ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2343
  power->icache_bitline = ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2344
  power->icache_senseamp = ndwl*ndbl*senseamp_power(colsb);
2345
  power->icache_tagarray = ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
2346

    
2347
  power->icache_power = power->icache_decoder + power->icache_wordline + power->icache_bitline + power->icache_senseamp + power->icache_tagarray;
2348

    
2349
  time_parameters.cache_size = cache_dl1->nsets * cache_dl1->bsize * cache_dl1->assoc; /* C */
2350
  time_parameters.block_size = cache_dl1->bsize; /* B */
2351
  time_parameters.associativity = cache_dl1->assoc; /* A */
2352
  time_parameters.number_of_sets = cache_dl1->nsets; /* C/(B*A) */
2353

    
2354
  calculate_time(&time_result,&time_parameters);
2355
  output_data(&time_result,&time_parameters);
2356

    
2357
  ndwl=time_result.best_Ndwl;
2358
  ndbl=time_result.best_Ndbl;
2359
  nspd=time_result.best_Nspd;
2360
  ntwl=time_result.best_Ntwl;
2361
  ntbl=time_result.best_Ntbl;
2362
  ntspd=time_result.best_Ntspd;
2363
  c = time_parameters.cache_size;
2364
  b = time_parameters.block_size;
2365
  a = time_parameters.associativity; 
2366

    
2367
  cache=1;
2368

    
2369
  rowsb = c/(b*a*ndbl*nspd);
2370
  colsb = 8*b*a*nspd/ndwl;
2371

    
2372
  tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
2373
  trowsb = c/(b*a*ntbl*ntspd);
2374
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2375

    
2376
  if(verbose) {
2377
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2378
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2379
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2380
    fprintf(stderr,"tagsize == %d\n",tagsize);
2381

    
2382
    fprintf(stderr,"\nntwl == %d, ntbl == %d, ntspd == %d\n",ntwl,ntbl,ntspd);
2383
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ntwl*ntbl,trowsb,tcolsb);
2384
  }
2385

    
2386
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2387
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2388
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2389

    
2390
  if(verbose)
2391
    fprintf(stderr,"dcache power stats\n");
2392
  power->dcache_decoder = res_memport*ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2393
  power->dcache_wordline = res_memport*ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2394
  power->dcache_bitline = res_memport*ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2395
  power->dcache_senseamp = res_memport*ndwl*ndbl*senseamp_power(colsb);
2396
  power->dcache_tagarray = res_memport*ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
2397

    
2398
  power->dcache_power = power->dcache_decoder + power->dcache_wordline + power->dcache_bitline + power->dcache_senseamp + power->dcache_tagarray;
2399

    
2400
  clockpower = total_clockpower(.018);
2401
  power->clock_power = clockpower;
2402
  if(verbose) {
2403
    fprintf(stderr,"result bus power == %f\n",power->resultbus);
2404
    fprintf(stderr,"global clock power == %f\n",clockpower);
2405
  }
2406

    
2407
  time_parameters.cache_size = cache_dl2->nsets * cache_dl2->bsize * cache_dl2->assoc; /* C */
2408
  time_parameters.block_size = cache_dl2->bsize; /* B */
2409
  time_parameters.associativity = cache_dl2->assoc; /* A */
2410
  time_parameters.number_of_sets = cache_dl2->nsets; /* C/(B*A) */
2411

    
2412
  calculate_time(&time_result,&time_parameters);
2413
  output_data(&time_result,&time_parameters);
2414

    
2415
  ndwl=time_result.best_Ndwl;
2416
  ndbl=time_result.best_Ndbl;
2417
  nspd=time_result.best_Nspd;
2418
  ntwl=time_result.best_Ntwl;
2419
  ntbl=time_result.best_Ntbl;
2420
  ntspd=time_result.best_Ntspd;
2421
  c = time_parameters.cache_size;
2422
  b = time_parameters.block_size;
2423
  a = time_parameters.associativity;
2424

    
2425
  rowsb = c/(b*a*ndbl*nspd);
2426
  colsb = 8*b*a*nspd/ndwl;
2427

    
2428
  tagsize = va_size - ((int)logtwo(cache_dl2->nsets) + (int)logtwo(cache_dl2->bsize));
2429
  trowsb = c/(b*a*ntbl*ntspd);
2430
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2431

    
2432
  if(verbose) {
2433
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2434
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2435
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2436
    fprintf(stderr,"tagsize == %d\n",tagsize);
2437
  }
2438

    
2439
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2440
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2441
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2442

    
2443
  if(verbose)
2444
    fprintf(stderr,"dcache2 power stats\n");
2445
  power->dcache2_decoder = array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2446
  power->dcache2_wordline = array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2447
  power->dcache2_bitline = array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2448
  power->dcache2_senseamp = senseamp_power(colsb);
2449
  power->dcache2_tagarray = simple_array_power(trowsb,tcolsb,1,1,cache);
2450

    
2451
  power->dcache2_power = power->dcache2_decoder + power->dcache2_wordline + power->dcache2_bitline + power->dcache2_senseamp + power->dcache2_tagarray;
2452

    
2453
  power->rat_decoder *= crossover_scaling;
2454
  power->rat_wordline *= crossover_scaling;
2455
  power->rat_bitline *= crossover_scaling;
2456

    
2457
  power->dcl_compare *= crossover_scaling;
2458
  power->dcl_pencode *= crossover_scaling;
2459
  power->inst_decoder_power *= crossover_scaling;
2460
  power->wakeup_tagdrive *= crossover_scaling;
2461
  power->wakeup_tagmatch *= crossover_scaling;
2462
  power->wakeup_ormatch *= crossover_scaling;
2463

    
2464
  power->selection *= crossover_scaling;
2465

    
2466
  power->regfile_decoder *= crossover_scaling;
2467
  power->regfile_wordline *= crossover_scaling;
2468
  power->regfile_bitline *= crossover_scaling;
2469
  power->regfile_senseamp *= crossover_scaling;
2470

    
2471
  power->rs_decoder *= crossover_scaling;
2472
  power->rs_wordline *= crossover_scaling;
2473
  power->rs_bitline *= crossover_scaling;
2474
  power->rs_senseamp *= crossover_scaling;
2475

    
2476
  power->lsq_wakeup_tagdrive *= crossover_scaling;
2477
  power->lsq_wakeup_tagmatch *= crossover_scaling;
2478

    
2479
  power->lsq_rs_decoder *= crossover_scaling;
2480
  power->lsq_rs_wordline *= crossover_scaling;
2481
  power->lsq_rs_bitline *= crossover_scaling;
2482
  power->lsq_rs_senseamp *= crossover_scaling;
2483
 
2484
  power->resultbus *= crossover_scaling;
2485

    
2486
  power->btb *= crossover_scaling;
2487
  power->local_predict *= crossover_scaling;
2488
  power->global_predict *= crossover_scaling;
2489
  power->chooser *= crossover_scaling;
2490

    
2491
  power->dtlb *= crossover_scaling;
2492

    
2493
  power->itlb *= crossover_scaling;
2494

    
2495
  power->icache_decoder *= crossover_scaling;
2496
  power->icache_wordline*= crossover_scaling;
2497
  power->icache_bitline *= crossover_scaling;
2498
  power->icache_senseamp*= crossover_scaling;
2499
  power->icache_tagarray*= crossover_scaling;
2500

    
2501
  power->icache_power *= crossover_scaling;
2502

    
2503
  power->dcache_decoder *= crossover_scaling;
2504
  power->dcache_wordline *= crossover_scaling;
2505
  power->dcache_bitline *= crossover_scaling;
2506
  power->dcache_senseamp *= crossover_scaling;
2507
  power->dcache_tagarray *= crossover_scaling;
2508

    
2509
  power->dcache_power *= crossover_scaling;
2510
  
2511
  power->clock_power *= crossover_scaling;
2512

    
2513
  power->dcache2_decoder *= crossover_scaling;
2514
  power->dcache2_wordline *= crossover_scaling;
2515
  power->dcache2_bitline *= crossover_scaling;
2516
  power->dcache2_senseamp *= crossover_scaling;
2517
  power->dcache2_tagarray *= crossover_scaling;
2518

    
2519
  power->dcache2_power *= crossover_scaling;
2520

    
2521
  power->total_power = power->local_predict + power->global_predict + 
2522
    power->chooser + power->btb +
2523
    power->rat_decoder + power->rat_wordline + 
2524
    power->rat_bitline + power->rat_senseamp + 
2525
    power->dcl_compare + power->dcl_pencode + 
2526
    power->inst_decoder_power +
2527
    power->wakeup_tagdrive + power->wakeup_tagmatch + 
2528
    power->selection +
2529
    power->regfile_decoder + power->regfile_wordline + 
2530
    power->regfile_bitline + power->regfile_senseamp +  
2531
    power->rs_decoder + power->rs_wordline +
2532
    power->rs_bitline + power->rs_senseamp + 
2533
    power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
2534
    power->lsq_rs_decoder + power->lsq_rs_wordline +
2535
    power->lsq_rs_bitline + power->lsq_rs_senseamp +
2536
    power->resultbus +
2537
    power->clock_power +
2538
    power->icache_power + 
2539
    power->itlb + 
2540
    power->dcache_power + 
2541
    power->dtlb + 
2542
    power->dcache2_power;
2543

    
2544
  power->total_power_nodcache2 =power->local_predict + power->global_predict + 
2545
    power->chooser + power->btb +
2546
    power->rat_decoder + power->rat_wordline + 
2547
    power->rat_bitline + power->rat_senseamp + 
2548
    power->dcl_compare + power->dcl_pencode + 
2549
    power->inst_decoder_power +
2550
    power->wakeup_tagdrive + power->wakeup_tagmatch + 
2551
    power->selection +
2552
    power->regfile_decoder + power->regfile_wordline + 
2553
    power->regfile_bitline + power->regfile_senseamp +  
2554
    power->rs_decoder + power->rs_wordline +
2555
    power->rs_bitline + power->rs_senseamp + 
2556
    power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
2557
    power->lsq_rs_decoder + power->lsq_rs_wordline +
2558
    power->lsq_rs_bitline + power->lsq_rs_senseamp +
2559
    power->resultbus +
2560
    power->clock_power +
2561
    power->icache_power + 
2562
    power->itlb + 
2563
    power->dcache_power + 
2564
    power->dtlb + 
2565
    power->dcache2_power;
2566

    
2567
  power->bpred_power = power->btb + power->local_predict + power->global_predict + power->chooser + power->ras;
2568

    
2569
  power->rat_power = power->rat_decoder + 
2570
    power->rat_wordline + power->rat_bitline + power->rat_senseamp;
2571

    
2572
  power->dcl_power = power->dcl_compare + power->dcl_pencode;
2573

    
2574
  power->rename_power = power->rat_power + 
2575
    power->dcl_power + 
2576
    power->inst_decoder_power;
2577

    
2578
  power->wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch + 
2579
    power->wakeup_ormatch;
2580

    
2581
  power->rs_power = power->rs_decoder + 
2582
    power->rs_wordline + power->rs_bitline + power->rs_senseamp;
2583

    
2584
  power->rs_power_nobit = power->rs_decoder + 
2585
    power->rs_wordline + power->rs_senseamp;
2586

    
2587
  power->window_power = power->wakeup_power + power->rs_power + 
2588
    power->selection;
2589

    
2590
  power->lsq_rs_power = power->lsq_rs_decoder + 
2591
    power->lsq_rs_wordline + power->lsq_rs_bitline + 
2592
    power->lsq_rs_senseamp;
2593

    
2594
  power->lsq_rs_power_nobit = power->lsq_rs_decoder + 
2595
    power->lsq_rs_wordline + power->lsq_rs_senseamp;
2596
   
2597
  power->lsq_wakeup_power = power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch;
2598

    
2599
  power->lsq_power = power->lsq_wakeup_power + power->lsq_rs_power;
2600

    
2601
  power->regfile_power = power->regfile_decoder + 
2602
    power->regfile_wordline + power->regfile_bitline + 
2603
    power->regfile_senseamp;
2604

    
2605
  power->regfile_power_nobit = power->regfile_decoder + 
2606
    power->regfile_wordline + power->regfile_senseamp;
2607

    
2608
  dump_power_stats(power);
2609

    
2610
}