Apriori算法的C/C#实现

来源:岁月联盟 编辑:exp 时间:2012-08-25

数据结构的选取,还做得不太好,会继续改进,请大牛多多指点。

之后我会比较C#与C的Apriori程序,总结一些区别,谈谈面向对象编程在这个算法上的体现与数据结构的选择问题。


 001 1 #include <dos.h> 

002   2 #include <conio.h> 

003   3 #include <math.h> 

004   4 #include <stdio.h> 

005   5 #include <stdlib.h> 

006   6  

007   7 #define ItemNumSize 2 

008   8 #define TranNumSize 100 

009   9 #define LISTINCREMENT 1 

010  10 #define OK 1 

011  11 #define TRUE 1 

012  12 #define FASLE 0 

013  13 #define ERROR 0 

014  14 #define MAX_ARRAY_DIM 100 

015  15 #define MAXSIZE  100 

016  16 typedef char ItemType; 

017  17 typedef int ElemType; 

018  18 float  minSupport,minConfidence; 

019  19 //动态内存分配,item用什么数据结构 动态数组,线性表好:数组是整体创建,整体删除的 

020  20 typedef struct  www.2cto.com

021  21 { 

022  22     ItemType *item;//项目 

023  23     int length;//当前项目个数 

024  24     int listsize;//当前分配的存储容量 

025  25 }SqList; 

026  26 //事务数组集合 

027  27 typedef struct

028  28 { 

029  29     SqList r[MAXSIZE+1]; 

030  30     int Length; 

031  31 }TranList; 

032  32  

033  33 //初始化项目的线性表 

034  34 int InitListSq(SqList &L) 

035  35 { 

036  36     L.item=(ItemType * )malloc(ItemNumSize *sizeof(ItemType)); 

037  37     if (!L.item)exit(OVERFLOW);//存储分配失败 

038  38     L.length=0;//空表长度为0 

039  39     L.listsize=ItemNumSize;//初始化存储容量 

040  40     return OK; 

041  41 } 

042  42 //初始化事务的线性表 

043  43 int InitListTran(TranList &TranL)//还有更好的动态分配方式初始化 

044  44 { 

045  45     for (int i=1;i<=TranNumSize;i++) 

046  46     { 

047  47         InitListSq(TranL.r[i]); 

048  48     } 

049  49     return OK; 

050  50 } 

051  51 //插入项目线性表 

052  52 int listInsertSq(SqList &L,int i,ItemType e) 

053  53 { 

054  54     //在线性表L中第i个位置之前插入新元素e 

055  55     //i的合法值为1<=i<=l.listlength+1 

056  56     ItemType *newbase,*q,*p; 

057  57     if(i<1||i>L.length+1)return ERROR;//i值不合法 

058  58     if (L.length>=L.listsize)//当前存储空间已满,添加分配 

059  59     { 

060  60         //重新分配内存空间 

061  61        newbase=(ItemType *)realloc(L.item,(L.listsize+LISTINCREMENT)*sizeof(ItemType)); 

062  62        if (!newbase)exit(OVERFLOW); 

063  63        L.item=newbase;//新基址 

064  64        L.listsize+=LISTINCREMENT;//增加存储容量 

065  65     } 

066  66     q=&(L.item[i-1]);//q为插入位置 

067  67     for(p=&(L.item[L.length-1]);p>=q;--p) 

068  68         *(p+1)=*p;//插入位置,及之后的元素右移 

069  69     *q=e; 

070  70     ++L.length; 

071  71     return OK; 

072  72 } 

073  73 void main() 

074  74 { 

075  75     int c; 

076  76     ItemType e; 

077  77     SqList L; 

078  78     int sn; 

079  79     int ItemNum; //项目个数 

080  80     int trannum[20]={0}; //事务数量 

081  81     char b2[100][10];  

082  82     char b21[100][10];  

083  83     TranList TranL; 

084  84     SqList L1; 

085  85     InitListSq(L); 

086  86     InitListTran(TranL); 

087  87     printf ("链表长度:%d/n", L.length);  // 线性表当前的元素个数 

088  88     printf ("链表大小:%d/n", L.listsize); // 线性表最多可存放元素的个数 

089  89     while (1) 

090  90     { 

091  91         system("cls"); 

092  92         printf_s("/n          Apriori算法的C语言实现/n"); 

093  93         printf_s("           1        输入项目集合/n"); 

094  94         printf_s("           2        添加事务/n"); 

095  95         printf_s("           3        设定最小支持度与最小置信度/n"); 

096  96         printf_s("           4        输出结果/n"); 

097  97         printf_s("           5        退出/n");     

098  98         printf_s("请输入:/n");     

099  99         scanf_s("%d",&c); 

100 100         switch (c) 

101 101         { 

102 102         case 1://构造项目集 

103 103             { 

104 104                 int it; 

105 105                 char ItemValue; 

106 106                 system("cls"); 

107 107                 printf_s("构造项目集/n"); 

108 108                 printf_s("请输入项目个数:/n");//项目个数 

109 109                 scanf_s("%d",&ItemNum); 

110 110                 for (it=1;it<=ItemNum;it++)//依次输入每个项目集 

111 111                 { 

112 112                     fflush(stdin); 

113 113                     printf_s("/n请输入第%d个项目的字母(a,b,c,d,e,f,……):/n",it); 

114 114                     scanf("%c",&ItemValue); 

115 115                     listInsertSq(L,it,ItemValue); 

116 116                 } 

117 117                 printf_s("/n初始化后,项目集各元素值:/n"); 

118 118                 for (int i=0;i<L.length;i++) 

119 119                 { 

120 120                     printf_s("%c/n",L.item[i]); 

121 121                 } 

122 122                 _getch(); 

123 123                 break; 

124 124             } 

125 125         case 2: 

126 126             { 

127 127                 system("cls"); 

128 128                 //事务的数据结构,动态数组 

129 129                 int i,j; 

130 130                 char tranvalue; 

131 131                 printf_s("请输入要添加的事务个数:/n");//事务个数 

132 132                 scanf_s("%d",&sn); 

133 133                 for (i=1;i<=sn;i++)//依次输入每个事务所包含的项目,i应当从0开始 

134 134                 { 

135 135                     printf_s("请输入第%d个事务包含的项目数:",i); 

136 136                     scanf_s("%d",&trannum[i]); 

137 137                     fflush(stdin); 

138 138                     for (j=1;j<=trannum[i];j++) 

139 139                     { 

140 140                         fflush(stdin); 

141 141                         printf_s("输入事务的第%d个项目:/n",j); 

142 142                         scanf_s("%c",&tranvalue); 

143 143                         //动态分配内存,插入事务数组集合 

144 144                         listInsertSq(TranL.r[i],j,tranvalue); 

145 145                     } 

146 146                 } 

147 147                 printf_s("/n各事务的项目如下:/n"); 

148 148                 for (i=1;i<=sn;i++) 

149 149                 { 

150 150                     printf_s("/n第%d个事务/n",i); 

151 151                     for (j=0;j<=trannum[i];j++) 

152 152                     { 

153 153                         printf_s("%c",TranL.r[i].item[j]); 

154 154                     } 

155 155                 } 

156 156                 _getch(); 

157 157                 break; 

158 158             } 

159 159         case 3://设定最小支持度与最小置信度 

160 160             { 

161 161                 system("cls"); 

162 162                 printf_s("请输入最小支持度与最小置信度(空格隔开):"); 

163 163                 fflush(stdin);//最好在每个scanf前加上fflush( stdin ); 

164 164                 scanf_s("%f%f",&minSupport,&minConfidence); 

165 165                 printf_s("/n最小支持度为:%2.2f/n",minSupport); 

166 166                 printf_s("最小置信度为:%2.2f/n",minConfidence); 

167 167                 _getch(); 

168 168                 break; 

169 169             } 

170 170         case 4://Apriori算法 

171 171             { 

172 172                 InitListSq(L1); 

173 173                 char generatedCandidate[10]; 

174 174                 int c[20]={0}; 

175 175                 int f[20]={0}; 

176 176                 int jj=1; 

177 177                 //得到C1,算法第一行 

178 178                 for (int i=0;i<ItemNum;i++)//算法太复杂了,以后改为二叉树 

179 179                 { 

180 180                     for (int j=1;trannum[j]!=0;j++) 

181 181                     { 

182 182                         for (int k=0;TranL.r[j].item[k]!=0;k++) 

183 183                         { 

184 184                             if (L.item[i]==TranL.r[j].item[k]) 

185 185                             { 

186 186                                 c[i]++; 

187 187                             } 

188 188                         } 

189 189                     } 

190 190                     //计算F1支持度 

191 191                     if (c[i]>=minSupport*trannum[i+1])//两个整数相除得到整数 

192 192                     { 

193 193                         f[i]=c[i]; 

194 194                         listInsertSq(L1,jj,L.item[i]);//L1 

195 195                         jj++; 

196 196                     } 

197 197                 } 

198 198                 printf_s("F1集合为:/n"); 

199 199                 int temp1=0; 

200 200                 for (int i=0;i<ItemNum;i++) 

201 201                 { 

202 202                     printf_s("{%c}=%d  ",L.item[i],f[i]); 

203 203                     if ((temp1+1)%3==0) 

204 204                         printf_s("/n"); 

205 205                     temp1++; 

206 206                 } 

207 207                 printf_s("/n"); 

208 208                 //排序TranL.r[j].item[k] 

209 209                 int t; 

210 210                 for (int i=1;i<=sn;i++)//每个事务 

211 211                 { 

212 212                     for (int j=0;j<trannum[j+1];j++)//每个项目 

213 213                     { 

214 214                         if (TranL.r[i].item[j]>TranL.r[i].item[j+1]) 

215 215                         { 

216 216                             t=TranL.r[i].item[j]; 

217 217                             TranL.r[i].item[j]=TranL.r[i].item[j+1]; 

218 218                             TranL.r[i].item[j]=t; 

219 219                         } 

220 220                     } 

221 221                 } 

222 222                 //GenerateCandidates函数 

223 223                 int j1; 

224 224                 j1=L1.length; 

225 225                 //把L1->b2[i][] 

226 226                 for (int i=0;i<j1;i++) 

227 227                 { 

228 228                     b2[i][0]=L1.item[i]; 

229 229                 } 

230 230                 int kk=0; 

231 231                 for (int i=0;i<L1.length;i++) 

232 232                 { 

233 233                     generatedCandidate[kk]=L1.item[i]; 

234 234                     kk++; 

235 235                     for (int j=i+1;j<L1.length;j++) 

236 236                     { 

237 237                         generatedCandidate[kk]=L1.item[i+1]; 

238 238                         if (generatedCandidate!=0) 

239 239                         { 

240 240                             char temp; 

241 241                             //排序 

242 242                             for (int i=0;generatedCandidate[i+1]!=0;i++) 

243 243                             { 

244 244                                 if (generatedCandidate[i]>generatedCandidate[i+1]) 

245 245                                 { 

246 246                                     temp=generatedCandidate[i]; 

247 247                                     generatedCandidate[i]=generatedCandidate[i+1]; 

248 248                                     generatedCandidate[i+1]=temp; 

249 249                                 } 

250 250                             } 

251 251                         } 

252 252                     } 

253 253                 } 

254 254                 int u=0; 

255 255                 int v=1;//用V来进行输出各种组合的标识数V=1表示正在进行输出 

256 256                 int c2[100]={0}; 

257 257                 int flag1=1; 

258 258                 int counter=0; 

259 259                 int temp; 

260 260                 //getsupport 

261 261                 for (int k=2;b2[0][0]!='/0';k++) 

262 262                 { 

263 263                     u=0;v=1; 

264 264                     for (int i=0;i<100;i++) 

265 265                     { 

266 266                         c2[i]=0; 

267 267                     } 

268 268                     for (int i=0;i<j1;i++) 

269 269                     { 

270 270                         for (int i1=i+1;i1<j1;i1++) 

271 271                         { 

272 272                             for (int j=0;j<k-2;j++) 

273 273                             { 

274 274                                 if (b2[i][j]!=b2[i1][j]) 

275 275                                 { 

276 276                                     flag1=0; 

277 277                                     break; 

278 278                                 } 

279 279                             } 

280 280                             //进行组合的部分 

281 281                             if (flag1==1&&b2[i][k-2]!=b2[i1][k-2]) 

282 282                             { 

283 283                                 for (int j2=0;j2<k-1;j2++) 

284 284                                 { 

285 285                                     b21[u][j2]=b2[i][j2]; 

286 286                                 } 

287 287                                 b21[u][k-1]=b2[i1][k-2]; 

288 288                                 u++; 

289 289                             } 

290 290                             flag1=1; 

291 291                         } 

292 292                     } 

293 293                     counter=0; 

294 294                     for (int i=0;i<sn;i++) 

295 295                     { 

296 296                         for (int i1=0;i1<u;i1++) 

297 297                         { 

298 298                             for (int j1=0;j1<k;j1++) 

299 299                             { 

300 300                                 for (int j=0;TranL.r[i+1].item[j]!='/0';j++) 

301 301                                 { 

302 302                                     if (TranL.r[i+1].item[j]==b21[i1][j1]) 

303 303                                         counter++; 

304 304                                 } 

305 305                             } 

306 306                             if (counter==k) 

307 307                                 c2[i1]++; 

308 308                             counter=0; 

309 309                         } 

310 310                     } 

311 311                     j1=0; 

312 312                     temp=0; 

313 313                     //对U中情况进行选择,选出支持度计数大于2的 

314 314                     for (int i=0;i<u;i++) 

315 315                     { 

316 316                         if (c2[i]>=minSupport) 

317 317                         { 

318 318                             if (v==1) 

319 319                             { 

320 320                                 printf_s("/nF%d集合为:/n",k); 

321 321                                 v=0; 

322 322                             } 

323 323                             printf_s("{"); 

324 324                             for (int j=0;j<k;j++) 

325 325                             { 

326 326                                 b2[j1][j]=b21[i][j]; 

327 327                                 printf_s("%c,",b2[j1][j]); 

328 328                             } 

329 329                             j1++; 

330 330                             printf_s("/b}"); 

331 331                             printf_s("=%d  ",c2[i]); 

332 332                             if ((temp+1)%3==0) 

333 333                                 printf_s("/n"); 

334 334                             temp++; 

335 335                         } 

336 336                     } 

337 337                     b2[j1][0]='/0'; 

338 338                     if (b2[0][0]!='0') 

339 339                     { 

340 340                         printf_s("/b /n"); 

341 341                     } 

342 342                 } 

343 343                 _getch(); 

344 344                 break; 

345 345             } 

346 346         case 5: 

347 347             { 

348 348                 return; 

349 349                 _getch(); 

350 350                 system("cls"); 

351 351                 break; 

352 352             } 

353 353         default: 

354 354             { 

355 355                 printf_s("输入有误请重新输入!/n"); 

356 356                 _getch(); 

357 357                 system("cls"); 

358 358             } 

359 359         } 

360 360  

361 361     } 

362 362 }