/*************************************************
**                                              **
**                  例子数据集                    **
**                                              **
*************************************************/

/* 一个班5个学生的姓名、性别、数学、语文成绩、平均分 */
data samp.c9501;
  input name $ 1-10 sex $ math chinese;
  avg = math*0.5 + chinese/120*100*0.5;
  cards;
李明      男 92 98
张红艺    女 89 106
王思明    男 86 90
张聪      男 98 109
刘颍      女 80 110
;
run;
proc datasets;
    copy out=work in=sasuser;
    select c9501;
quit;

data c9501m c9501f;
  set c9501;
  select(sex);
    when('男') output c9501m;
    when('女') output c9501f;
    otherwise put sex= '有错';
  end;
  drop sex;
run;
data c9501x;
  set c9501;
  keep name sex;
run;
data c9501y;
  set c9501;
  keep name math chinese;
run;
data bkmoney;
  input name $  amount;
  cards;
李明  20
张红艺 15
王思明 10
张聪 20
刘颍 50
;
run;


/* 用于演示聚类和INSIGHT中分组符号、颜色的数据集，
   X,Y,Z区间变量，分为3个组(G)
*/
data samp.clus;
  do i=1 to 50;
     g='a';
     x = normal(0); y = normal(0); 
     z = normal(0);
     output;
  end;
  do i=1 to 50;
     g='b';
     x = 3 + normal(0); y = 4 + normal(0); 
     z = -3 + normal(0);
     output;
  end;
  do i=1 to 50;
     g='c';
     x = 3 + normal(0); y = -4 + normal(0); 
     z = -3 + normal(0);
     output;
  end;
  drop i;
run;


/* 很多行的数据集 */
data huge;
  array x(10);
  do i=1 to 10000;
    do j=1 to 10;
      x(j) = normal(0);
    end;
    output;
  end;
  drop i j;
run;


/* 工资单例子: 职工号(IdNumber)、性别(Sex)、工作类型代码(Jobcode)、
 * 工资(Salary)、生日(Birth)、入职日期(Hired)。
 * 可用于SQL。
 */
data payroll;
   input IdNumber $ 1-4 Sex $ 6 Jobcode $ 8-10
         Salary 12-16 @18 Birth date7. 
         @26 Hired date7.;
   format birth hired mmddyy8.;
   datalines;    
1009 M TA1 28880 02MAR59 26MAR92
1017 M TA3 40858 28DEC57 16OCT81
1036 F TA3 39392 19MAY65 23OCT84
1037 F TA1 28558 10APR64 13SEP92
1038 F TA1 26533 09NOV69 23NOV91
1050 M ME2 35167 14JUL63 24AUG86
1065 M ME2 35090 26JAN44 07JAN87
1076 M PT1 66558 14OCT55 03OCT91
1094 M FA1 22268 02APR70 17APR91
1100 M BCK 25004 01DEC60 07MAY88
;
run;

/* OilProd: 原油产量，包括Country(国家)、日产原油(BarrelsPerDay)  */
data OilProd;
   input Country $ 1-24 @26 BarrelsPerDay comma9.;
   format barrelsperday comma9.;
   datalines;
Algeria                  1,400,000
Canada                   2,500,000
China                    3,000,000
Egypt                      900,000
Indonesia                1,500,000
Iran                     4,000,000
Iraq                       600,000
Kuwait                   2,500,000
Libya                    1,500,000
Mexico                   3,400,000
Nigeria                  2,000,000
Norway                   3,500,000
Oman                       900,000
Saudi Arabia             9,000,000
United States of America 8,000,000
United Arab Emirates     2,000,000
United Kingdom           3,000,000
Venezuela                3,000,000
USSR (former)            7,000,000
;
run;

/* OilRsrvs: 原油储量数据，变量Country(国家)、储量(Barrels)  */
data OilRsrvs;
   input Country $ 1-24 @26 Barrels comma15.;
   format barrels comma15.;
   datalines;
Algeria                    9,200,000,000
Canada                     7,000,000,000
China                     25,000,000,000
Egypt                      4,000,000,000
Gabon                      1,000,000,000
Indonesia                  5,000,000,000
Iran                      90,000,000,000
Iraq                     110,000,000,000
Kuwait                    95,000,000,000
Libya                     30,000,000,000
Mexico                    50,000,000,000
Nigeria                   16,000,000,000
Norway                    11,000,000,000
Saudi Arabia             260,000,000,000
United Arab Emirates         100,000,000
United Kingdom             4,500,000,000
United States of America  30,000,000,000
Venezuela                 65,000,000,000
USSR (Former)             65,500,000,000
;
run;


/* WorldCityCoords: 世界各大城市坐标数据, 变量
 * City(城市名称)、Country(所在国家)、Latitude(纬度)、Longitude(经度).
 */
data worldcitycoords;                                   
   input City $ 1-25 Country $ 28-46 Latitude 48-50     
         Longitude 53-56;                               
   datalines;                                              
Kabul                      Afghanistan          35    69
Algiers                    Algeria              37     3
Buenos Aires               Argentina           -34   -59
Cordoba                    Argentina           -31   -64
Tucuman                    Argentina           -27   -65
Adelaide                   Australia           -35   138
Alice Springs              Australia           -24   134
Brisbane                   Australia           -27   153
Darwin                     Australia           -12   131
Melbourne                  Australia           -38   145
Perth                      Australia           -32   116
Sydney                     Australia           -34   151
Vienna                     Austria              48    16
Nassau                     Bahamas              26   -77
Chittagong                 Bangladesh           22    92
Brussels                   Belgium              51     4
Belize                     Belize               17   -88
Kindley AFB                Bermuda              33   -65
La Paz                     Bolivia             -16   -69
Belem                      Brazil               -1   -48
Belo Horizonte             Brazil              -20   -44
Brasilia                   Brazil              -16   -48
Curitiba                   Brazil              -25   -49
Fortaleza                  Brazil               -4   -38
Porto Alegre               Brazil              -30   -51
Recife                     Brazil               -9   -35
Rio de Janeiro             Brazil              -23   -43
Salvador                   Brazil              -13   -38
Sao Paulo                  Brazil              -23   -46
Sofia                      Bulgaria             43    23
Phnom Penh                 Cambodia             11   105
Calgary                    Canada               51  -114
Havre                      Canada               48  -110
Kingston                   Canada               44   -76
London                     Canada               43   -81
Moose Jaw                  Canada               50  -105
Montreal                   Canada               45   -73
Ottawa                     Canada               45   -76
Port Arthur                Canada               48   -89
Quebec                     Canada               47   -71
St. John                   Canada               45   -66
Toronto                    Canada               44   -79
Victoria                   Canada               48  -123
Winnipeg                   Canada               50   -98
Punta Arenas               Chile               -53   -71
Santiago                   Chile               -33   -71
Valparaiso                 Chile               -33   -71
Chongquing                 China                29   106
Shanghai                   China                31   121
Baranquilla                Colombia             11   -75
Bogota                     Colombia              4   -75
Cali                       Colombia              3   -76
Medellin                   Colombia              6   -75
Brazzaville                Congo                -4    15
Guantanamo Bay             Cuba                 20   -76
Havana                     Cuba                 24   -82
Prague                     Czech Republic       51    14
Copenhagen                 Denmark              56    12
Santo Domingo              Dominican Republic   18   -70
Cairo                      Egypt                30    31
San Salvador               El Salvador          14   -89
Guayaquil                  Ecuador             -21   -80
Quito                      Ecuador               0   -78
Addis Ababa                Ethiopia              9    39
Asmara                     Ethiopia             15    39
Helsinki                   Finland              60    25
Lyon                       France               46     5
Marseilles                 France               43     5
Nantes                     France               47    -1
Nice                       France               44     7
Paris                      France               49     2
Strasbourg                 France               48     8
Cayenne                    French Guiana         5   -52
Berlin                     Germany              52    13
Hamburg                    Germany              53    10
Hannover                   Germany              52    10
Mannheim                   Germany              49     8
Munich                     Germany              49    11
Accra                      Ghana                 5     0
Gibraltar                  Gibraltar            37    -5
Athens                     Greece               38    24
Thessaloniki               Greece               40    23
Guatemala City             Guatemala            14   -90
Georgetown                 Guyana                7   -58
Port Au Prince             Haiti                18   -72
Tegucigalpa                Honduras             15   -87
Hong Kong                  Hong Kong            22   114
Budapest                   Hungary              47    19
Reykjavik                  Iceland              65    22
Ahmenabad                  India                22    72
Bangalore                  India                13    77
Bombay                     India                19    73
Calcutta                   India                22    88
Madras                     India                14    80
Nagpur                     India                22    80
New Delhi                  India                28    77
Djakarta                   Indonesia            -6   107
Kupang                     Indonesia           -10   123
Makassar                   Indonesia            -6   119
Medan                      Indonesia             3    99
Palembang                  Indonesia            -3   105
Surabaya                   Indonesia            -7   113
Abadan                     Iran                 30    48
Meshed                     Iran                 36    59
Tehran                     Iran                 36    51
Baghdad                    Iraq                 33    44
Mosul                      Iraq                 36    44
Dublin                     Ireland              53    -6
Shannon                    Ireland              53    -9
Jerusalem                  Israel               32    35
Tel Aviv                   Israel               33    35
Milan                      Italy                45     9
Naples                     Italy                41    14
Rome                       Italy                42    12
Fukuoka                    Japan                33   130
Sapporo                    Japan                44   141
Tokyo                      Japan                36   140
Amman                      Jordan               32    36
Nairobi                    Kenya                -1    37
Pyongyang                  Korea, North         39   126
Seoul                      Korea, South         37   127
Beirut                     Lebanon              34    35
Monrovia                   Liberia               6   -11
Benghazi                   Libya                33    21
Tananarive                 Madagascar          -19    47
Kuala Lumpur               Malaysia              4   102
Penang                     Malaysia              5   100
Guadalajara                Mexico               21  -103
Merida                     Mexico               21   -89
Mexico City                Mexico               19   -99
Monterrey                  Mexico               26  -100
Vera Cruz                  Mexico               19   -97
Casablanca                 Morocco              33    -7
Katmandu                   Nepal                28    85
Amsterdam                  Netherlands          52     5
Auckland                   New Zealand         -37   175
Christchurch               New Zealand         -43   172
Wellington                 New Zealand         -41   175
Managua                    Nicaragua            12   -86
Lagos                      Nigeria               6     3
Bergen                     Norway               60     5
Oslo                       Norway               60    11
Karachi                    Pakistan             25    67
Lahore                     Pakistan             31    74
Peshwar                    Pakistan             34    71
Panama City                Panama                9   -79
Port Moresby               Papua New Guinea     -9   148
Ascuncion                  Paraguay            -25   -57
Lima                       Peru                -13   -77
Manila                     Philippines          14   121
Krakow                     Poland               51    20
Warsaw                     Poland               52    21
Lisbon                     Portugal             39   -10
San Juan                   Puerto Rico          18   -67
Bucharest                  Romania              44    27
Kiev                       Russia               50    30
Leningrad                  Russia               60    30
Minsk                      Russia               54    27
Moscow                     Russia               56    38
Odessa                     Russia               46    31
Tashkent                   Russia               41    69
Tbilisi                    Russia               42    45
Vladivostok                Russia               44   132
Volgograd                  Russia               49    44
Dhahran                    Saudi Arabia         26    51
Jedda                      Saudi Arabia         21    39
Riyadh                     Saudi Arabia         24    47
Dakar                      Senegal              15   -17
Singapore                  Singapore             1   104
Mogadiscio                 Somalia               2    49
Cape Town                  South Africa        -34    18
Johannesburg               South Africa        -26    28
Pretoria                   South Africa        -26    28
Aden                       Yemen                13    45
Barcelona                  Spain                41     3
Madrid                     Spain                40    -4
Valencia                   Spain                39     0
Colombo                    Sri Lanka             7    80
Khartoum                   Sudan                15    32
Paramaribo                 Suriname              6   -56
Stockholm                  Sweden               59    19
Zurich                     Switzerland          47     8
Damascus                   Syria                33    36
Tainan                     Taiwan               23   120
Taipei                     Taiwan               25   121
Dar es Salaam              Tanzania             -7    39
Bangkok                    Thailand             14   100
Port of Spain              Trinidad and Tobago  11   -61
Tunis                      Tunisia              37    10
Adana                      Turkey               37    35
Ankara                     Turkey               40    33
Istanbul                   Turkey               41    29
Izmir                      Turkey               38    27
Belfast                    Northern Ireland     54    -6
Birmingham                 England              52    -2
Cardiff                    Wales                51    -3
Edinburgh                  Scotland             56    -3
Glasgow                    Scotland             56    -4
London                     England              51     0
Montevideo                 Uruguay             -35   -56
Caracas                    Venezuela            10   -67
Maracaibo                  Venezuela            10   -71
Da Nang                    Vietnam              17   108
Hanoi                      Vietnam              21   106
Ho Chi Minh City (Saigon)  Vietnam              11   107
Belgrade                   Yugoslavia           45    20
Acapulco                   Mexico               17  -100
Beijing                    China                40   116
San Jose                   Costa Rica           10   -85
Hamilton                   Bermuda              32   -65
Vancouver                  Canada               49  -124
Kingston                   Jamaica              18   -77
;
run;

/* Countries数据集: 各国家基本情况数据。
 * 包括Name(国家名称)、Capital(首都)、Population(人口数)、
 * Area(面积)、Continent(所在洲)、UNDate(加入联合国年)。
 */
data Countries;                                                                                                 
   input Name $ 1-35 Capital $ 37-55 Population 57-65
         Area 67-75 Continent $ 77-107 UNDate 109-112;
   datalines;
Afghanistan                         Kabul                17070323    251825 Asia                            1946
Albania                             Tirane                3407400     11100 Europe                          1955
Algeria                             Algiers              28171132    919595 Africa                          1962
Andorra                             Andorra la Vella        64634       200 Europe                          1993
Angola                              Luanda                9901050    481300 Africa                          1976
Antigua and Barbuda                 St. John's              65644       171 Central America and Caribbean   1981
Argentina                           Buenos Aires         34248705   1073518 South America                   1945
Armenia                             Yerevan               3556864     11500 Asia                            1992
Australia                           Canberra             18255944   2966200 Australia                       1945
Austria                             Vienna                8033746     32400 Europe                          1955
Azerbaijan                          Baku                  7760064     33400 Asia                            1992
Bahamas                             Nassau                 275703      5400 Central America and Caribbean   1973
Bahrain                             Manama                 591800       300 Asia                            1971
Bangladesh                          Dhaka                1.2639E8     57300 Asia                            1974
Barbados                            Bridgetown             258534       200 Central America and Caribbean   1966
Belarus                             Minsk                10508000     80100 Europe                          1945
Belgium                             Brussels             10162614     11800 Europe                          1945
Belize                              Belmopan               211069      8900 Central America and Caribbean   1981
Benin                               Porto Novo            5394881     43500 Africa                          1960
Bermuda                             Hamilton                60594       100                                    .
Bhutan                              Thimphu               1756214     18100 Asia                            1971
Bolivia                             La Paz                7795410    424200 South America                   1945
Bosnia and Herzegovina              Sarajevo              4697040     19700 Europe                          1992
Botswana                            Gaborone              1372453    224600 Africa                          1966
Brazil                              Brasilia             1.6031E8   3286500 South America                   1945
Brunei                              Bandar Seri Begawan    287822      2200 Asia                            1984
Bulgaria                            Sofia                 8887111     42900 Europe                          1955
Burkina Faso                        Ouagodougou          10235326    105900 Africa                          1960
Burundi                             Bujumbura             6185632     10700 Africa                          1962
Cambodia                            Phnom Penh           10366614     70200 Asia                            1955
Cameroon                            Yaounde              13261994    183600 Africa                          1960
Canada                              Ottawa               28392302   3849674 North America                   1945
Cape Verde                          Praia                  427188      1600 Africa                             .
Cayman Islands                      Georgetown              23228       100 Central America and Caribbean      .
Central African  Republic           Bangui                3173103    240300 Africa                          1960
Chad                                N'Djamena             5521118    495800 Africa                          1960
Channel Islands                                            146436       100 Europe                             .
Chile                               Santiago             14089101    292100 South America                   1945
China                               Beijing              1.2022E9   3696100 Asia                            1945
Colombia                            Bogota               35930188    440800 South America                   1945
Comoros                             Moroni                 535246       700 Africa                          1975
Congo                               Brazzaville           2471223    132000 Africa                          1960
Congo, Democratic Republic of       Kinshasa             43106529    905400 Africa                          1960
Costa Rica                          San Jose              3375083     19700 Central America and Caribbean   1945
Cote D'Ivoire                       Yamoussoukro         14437516    124500 Africa                          1960
Croatia                             Zagreb                4744505     21800 Europe                          1992
Cuba                                Havana               11173523     42800 Central America and Caribbean   1945
Cyprus                              Nicosia                737226      3600 Asia                            1960
Czech Republic                      Prague               10511029     30400 Europe                          1993
Denmark                             Copenhagen            5239356     16600 Europe                          1945
Djibouti                            Djibouti               417089      8900 Africa                          1977
Dominica                            Roseau                  88871       300 Central America and Caribbean   1978
Dominican Republic                  Santo Domingo         7903469     18700 Central America and Caribbean   1945
Ecuador                             Quito                10782691    105000 South America                   1945
Egypt                               Cairo                59912259    385200 Africa                          1945
El Salvador                         San Salvador          5809949      8100 Central America and Caribbean   1945
England                             London               49293170     50400 Europe                          1945
Equatorial Guinea                   Malabo                 414059     10800 Africa                          1968
Eritrea                             Asmera                3231677     45300 Africa                          1993
Estonia                             Tallinn               1633006     17400 Europe                          1991
Ethiopia                            Addis Ababa          59291170    437800 Africa                          1945
Fiji                                Suva                   771563      7100 Oceania                         1970
Finland                             Helsinki              5119178    130600 Europe                          1955
France                              Paris                58412558    210000 Europe                          1945
French Guiana                       Cayenne                102000     43700 South America                      .
Gabon                               Libreville            1150275    103300 Africa                          1960
Gambia (The)                        Banjul                 968493      4100 Africa                          1965
Georgia, Republic of                Tbilisi               5737236     26900 Asia                            1992
Germany                             Berlin               81890690    137700 Europe                          1973
Ghana                               Accra                17395511     92100 Africa                          1957
Gibraltar                           Gibraltar               30297       100 Europe                             .
Greece                              Athens               10669583     51000 Europe                          1945
Grenada                             St. George's            94931       100 Central America and Caribbean   1974
Guatemala                           Guatemala City       10827127     42000 Central America and Caribbean   1945
Guinea                              Conakry               6455275     94900 Africa                          1958
Guinea-Bissau                       Bissau                1108869     13900 Africa                          1974
Guyana                              Georgetown             736216     83000 South America                   1966
Haiti                               Port-au-Prince        6555255     10700 Central America and Caribbean   1945
Honduras                            Tegucigalpa           5367613     43300 Central America and Caribbean   1945
Hong Kong                           Victoria              5857414       400 Asia                               .
Hungary                             Budapest             10421148     35900 Europe                          1955
Iceland                             Reykjavik              266614     36700                                 1946
India                               New Delhi            9.2901E8   1222600 Asia                            1945
Indonesia                           Jakarta              2.0239E8    741100 Asia                            1950
Iran                                Tehran               66261493    632500 Asia                            1945
Iraq                                Baghdad              20086891    168000 Asia                            1945
Ireland                             Dublin                3574032     27100 Europe                          1955
Isle of Man                         Douglas                 70693       200 Europe                             .
Israel                              Jerusalem             5101000      8000 Asia                            1949
Italy                               Rome                 58713508    116300 Europe                          1955
Jamaica                             Kingston              2580291      4200 Central America and Caribbean   1962
Japan                               Tokyo                1.2635E8    145900 Asia                            1956
Jordan                              Amman                 4000210     34300 Asia                            1955
Kalaallit Nunaat                    Nuuk                    57564    840000                                    .
Kazakhstan                          Almaty               17438936   1049200 Asia                            1992
Kenya                               Nairobi              28520558    225000 Africa                          1963
Kiribati                            Tarawa                  78772       300 Oceania                            .
Korea, North                        Pyongyang            23295340     47400 Asia                            1991
Korea, South                        Seoul                45529277     38300 Asia                            1991
Kuwait                              Kuwait City           1837006      6900 Asia                            1963
Kyrgyzstan                          Bishkek               4744505     76600 Asia                            1992
Laos                                Vientiane             4748545     91400 Asia                            1955
Latvia                              Riga                  2776212     24900 Europe                          1991
Lebanon                             Beirut                3655834      3900 Asia                            1945
Leeward Islands                     Plymouth                12119       100 Central America and Caribbean      .
Lesotho                             Maseru                1963244     11700 Africa                          1966
Liberia                             Monrovia              3002430     38200 Africa                          1945
Libya                               Tripoli               5107059    679400 Africa                          1955
Liechtenstein                       Vaduz                   30297       100 Europe                          1990
Lithuania                           Vilnius               3886091     25200 Europe                          1991
Luxembourg                          Luxembourg             405980       100 Europe                          1945
Macedonia                           Skopje                2235917      9900 Europe                          1993
Madagascar                          Antananarivo         13560924    226700 Africa                          1960
Malawi                              Lilongwe              9828337     45700 Africa                          1964
Malaysia                            Kuala Lumpur         19473883    127600 Asia                            1957
Maldives                            Male                   254495       100 Asia                            1965
Mali                                Bamako                9203210    482100 Africa                          1960
Malta                               Valletta               370633       100 Europe                          1964
Marshall Islands                    Majuro                  54535       100 Oceania                         1991
Mauritania                          Nouakchott            2214709    398000 Africa                          1961
Mauritius                           Port Louis            1128057      1000 Africa                          1968
Mexico                              Mexico City          93114708    756100 North America                   1945
Micronesia                          Palikir                121188       300 Oceania                         1991
Moldova                             Chisinau              4517279     13000 Europe                          1992
Monaco                              Monaco                  31307       100 Europe                          1993
Mongolia                            Ulaan Baatar          2454055    604800 Asia                            1961
Montenegro                          Titograd               626137      5300 Europe                             .
Morocco                             Rabat                28841705    177100 Africa                          1956
Mozambique                          Maputo               17517708    313700 Africa                          1975
Myanmar                             Yangon               44715298    261200 Asia                            1948
Namibia                             Windhoek              1611798    318100 Africa                          1990
Nauru                               Yaren                   10099       100 Oceania                            .
Nepal                               Kathmandu            21250295     56800 Asia                            1955
Netherlands                         Amsterdam            15538306     16000 Europe                          1945
Netherlands Antilles                Willemstad             185822       400 Central America and Caribbean      .
New Zealand                         Wellington            3422548    104500 Oceania                         1945
Nicaragua                           Managua               4137556     50900 Central America and Caribbean   1945
Niger                               Niamey                8720477    497000 Africa                          1960
Nigeria                             Abuja                99062003    356700 Africa                          1960
Northern Ireland                    Belfast               1585541      5500 Europe                             .
Norway                              Oslo                  4357714    125100 Europe                          1945
Oman                                Muscat                1717838    118200 Asia                            1971
Pakistan                            Islamabad            1.2306E8    339700 Asia                            1947
Panama                              Panama City           2656034     29200 Central America and Caribbean   1945
Papua New Guinea                    Port Moresby          4238546    178700 Asia                            1975
Paraguay                            Asuncion              5265614    157000 South America                   1945
Peru                                Lima                 23885121    496200 South America                   1945
Philippines                         Manila               70500039    115900 Asia                            1945
Poland                              Warsaw               39037645    120700 Europe                          1945
Portugal                            Lisbon               10628177     35700 Europe                          1955
Puerto Rico                         San Juan              3556864      3492 Central America and Caribbean      .
Qatar                               Doha                   518078      4400 Asia                            1971
Romania                             Bucharest            23410469     91700 Europe                          1955
Russia                              Moscow               1.5109E8   6592800 Europe                          1945
Rwanda                              Kigali                8456895     10200 Africa                          1962
Saint Kitts and Nevis               Basseterre              41406       100 Central America and Caribbean   1983
Saint Lucia                         Castries               146436       200 Central America and Caribbean   1979
Saint Vincent and the Grenadines    Kingstown              116138       200 Central America and Caribbean   1980
San Marino                          San Marino              24238       100 Europe                          1992
Sao Tome and Principe               Sao Tome               138356       400 Africa                          1975
Saudi Arabia                        Riyadh               18377132    865000 Asia                            1945
Scotland                            Edinburgh             5006069     30400 Europe                             .
Senegal                             Dakar                 8817428     76000 Africa                          1960
Serbia                              Belgrade              9755624     34100 Europe                             .
Seychelles                          Victoria                72713       200 Africa                          1976
Sierra Leone                        Freetown              4675832     27200 Africa                          1961
Singapore                           Singapore             2887301       200 Asia                            1965
Slovakia                            Bratislava            5457495     18900 Europe                          1993
Slovenia                            Ljubljana             1991521      7800 Europe                          1992
Solomon Islands                     Honiara                389821     11000 Oceania                         1978
Somalia                             Mogadishu             6732996    246300 Africa                          1960
South Africa                        Cape Town            44365873    473300 Africa                          1945
Spain                               Madrid               39692061    194900 Europe                          1955
Sri Lanka                           Colombo              18211509     25300 Asia                            1955
Sudan                               Khartoum             29711229    966800 Africa                          1956
Suriname                            Paramaribo             427188     63300 South America                   1975
Swaziland                           Mbabane                945265      6700 Africa                          1968
Sweden                              Stockholm             8864893    173700 Europe                          1946
Switzerland                         Bern                  7109689     15900 Europe                             .
Syria                               Damascus             15034366     71500 Asia                            1945
Taiwan                              Taipei               21509839     14000 Asia                               .
Tajikistan                          Dushanbe              6054344     55300 Asia                            1992
Tanzania                            Dar-es-Salaam        28263033     36400 Africa                          1961
Thailand                            Bangkok              60099089    198100 Asia                            1946
Togo                                Lome                  4297120     21900 Africa                          1960
Tonga                               Nuku'alofa             106040       300 Oceania                            .
Trinidad and Tobago                 Port of Spain         1341146      2000 Central America and Caribbean   1962
Tunisia                             Tunis                 8813388     63400 Africa                          1956
Turkey                              Ankara               62769263    300948 Europe                          1945
Turkmenistan                        Ashgabat              4034546    188400 Asia                            1992
Turks and Caicos Islands            Grand Turk              12119       200 Central America and Caribbean      .
Tuvalu                              Funafuti                10099       100 Oceania                            .
Uganda                              Kampala              20055584     93100 Africa                          1962
Ukraine                             Kiev                 52360233    233100 Europe                          1945
United Arab Emirates                Abu Dhabi             2818628     30000 Asia                            1971
United States                       Washington           2.6329E8   3787318 North America                   1945
Uruguay                             Montevideo            3230667     68000 South America                   1945
Uzbekistan                          Tashkent             22832806    172700 Asia                            1992
Vanuatu                             Vila                   171683      4700 Oceania                         1981
Vatican City                        Vatican City             1010         2 Europe                             .
Venezuela                           Caracas              20765543    352100 South America                   1945
Vietnam                             Hanoi                73827657    127200 Asia                            1977
Wales                               Cardiff               2825697      8000 Europe                             .
Western Samoa                       Apia                   206020      1100 Oceania                         1976
Yemen                               Sanaa                11214929    205300 Asia                            1947
Yugoslavia                          Belgrade             10866513     39400 Europe                          1945
Zambia                              Lusaka                9278952    290600 Africa                          1964
Zimbabwe                            Harare               11083641    150900 Africa                          1980
;
run;
*';

/* EXPREV: 一些地区的支出和收入数据
 * 变量Region(地区)、State(州)、Month(年月)、支出(Expenses)、收入(Revenues)。
 */
data exprev;
   input Region $ State $ Month monyy5.
         Expenses Revenues;
   format month monyy5.;
   datalines;
Southern GA JAN95 2000  8000
Southern GA FEB95 1200  6000
Southern FL FEB95 8500 11000
Northern NY FEB95 3000  4000
Northern NY MAR95 6000  5000
Southern FL MAR95 9800 13500
Northern MA MAR95 1500  1000
;
run;


/* Pilots: 飞行员信息数据。
 * 变量ID(身份编码)、LastName(姓)、FirstName(名)、
 * City(城市)、State(州)、Gender(性别)、
 * JobCode(工作类型代码)、Salary(工资)。
 */
data pilots;
   infile datalines;
   input ID $ 1-4 LastName $ 6-15 FirstName $ 17-25
         City $ 27-38 State $ 40-41 Gender $ 43
         JobCode $ 45-47 @49 Salary @56;
   datalines;
1333 BLAIR      JUSTIN    STAMFORD     CT M PT2  88606
1739 BOYCE      JONATHAN  NEW YORK     NY M PT1  66517
1428 BRADY      CHRISTINE STAMFORD     CT F PT1  68767
1404 CARTER     DONALD    NEW YORK     NY M PT2  91376
1118 DENNIS     ROGER     NEW YORK     NY M PT3 111379
1905 GRAHAM     ALVIN     NEW YORK     NY M PT1  65111
1407 GRANT      DANIEL    MT. VERNON   NY M PT1  68096
1410 HARRIS     CHARLES   STAMFORD     CT M PT2  84685
1439 HARRISON   FELICIA   BRIDGEPORT   CT F PT1  70736
1545 HUNTER     CLYDE     STAMFORD     CT M PT1  66130
1777 LUFKIN     ROY       NEW YORK     NY M PT3 109630
1106 MARSHBURN  JASPER    STAMFORD     CT M PT2  89632
1333 NEWKIRK    SANDRA    PRINCETON    NJ F PT2  84536
1478 NEWTON     JAMES     NEW YORK     NY M PT2  84203
1556 PENNINGTON MICHAEL   NEW YORK     NY M PT1  71349
1890 STEPHENSON ROBERT    NEW YORK     NY M PT2  85896
1107 THOMPSON   WAYNE     NEW YORK     NY M PT2  89977
1830 TRIPP      KATHY     BRIDGEPORT   CT F PT2  84471
1928 UPCHURCH   LARRY     WHITE PLAINS NY M PT2  89858
1076 VENTER     RANDALL   NEW YORK     NY M PT1  66558
;
run;


/* Wghtclub: 减肥数据。
 * 变量idno(编号)、name(姓名)、team(分组)、strtwght(起始体重)、
 * endwght(减肥后体重)、loss(减重)。
 */
data wghtclub; 
   input idno 1-4 name $ 6-24 team $ strtwght endwght; 
   loss=strtwght-endwght; 
   datalines; 
1023 David Shaw         red 189 165 
1049 Amelia Serrano     yellow 145 124 
1219 Alan Nance         red 210 192 
1246 Ravi Sinha         yellow 194 177 
1078 Ashley McKnight    red 127 118 
run; 



/* Patients: 病人基本信息数据
 * 变量ID(编号)、Name(姓名)、Sex(性别)、Age(年龄)、
 * Date(住院日期)、Height(身高)、Weight(体重)、
 * ActLevel(活动级别)、Fee(费用)
 */
data patients;
   input ID $ 1-4 Name $ 6-16 Sex $ 18 Age 20-21 
         Date 23-24 Height 26-27 Weight 29-31 
		 ActLevel $ 33-36 Fee 38-43;
   format fee 6.2; 
   datalines;
2458 Murray, W   M 27  1 72 168 HIGH  85.20
2462 Almers, C   F 34  3 66 152 HIGH 124.80
2523 Johnson, R  F 43 31 63 137 MOD  149.75
2539 LaMance, K  M 51  4 71 158 LOW  124.80
2544 Jones, M    M 29  6 76 193 HIGH 124.80
2552 Reberson, P F 32  9 67 151 MOD  149.75
2555 King, E     M 35 13 70 173 MOD  149.75
2563 Pitts, D    M 34 22 73 154 LOW  124.80
2571 Nunnelly, A F 44 19 66 140 HIGH 149.75
2572 Oberon, M   F 28 17 62 118 LOW   85.20
2574 Peterson, V M 30  6 69 147 MOD  149.75
2575 Quigley, M  F 40  8 69 163 HIGH 124.80
2578 Cameron, L  M 47  5 72 173 MOD  124.80
2586 Derber, B   M 25 23 75 188 HIGH  85.20
2588 Ivan, H     F 22 20 63 139 LOW   85.20
2589 Wilcox, E   F 41 16 67 141 HIGH 149.75
2595 Warren, C   M 54  7 71 183 MOD  149.75
;
run;


/* 用于演示转置的矩阵 */
data mat;
  input x1 x2 x3;
  cards;
1   2  3
4   5  6
7   8  9
10 11 12
;
run;

/* 用于演示合并行的矩阵，每个病人(num)有两行，一行的test为'a',
 * 一行的test为'b', 变量val保存药效值。
 */
data onecol;
  input num test$ val;
  cards;
1 a 11
2 a 12
3 a 13
1 b 21
2 b 22
3 b 23
;
run;


/* 用于演示拆分行的数据集。num为病人编号，test1和test2分别为A药和B药的疗效。
 */
data twocol;
  input num test1 test2;
  cards;
1 11 21
2 12 22
3 13 23
;
run;


/* 眼睛颜色和头发颜色的数据集 */
data color;
   input Region Eyes $ Hair $ Count @@;
   label eyes='Eye Color'
         hair='Hair Color'
         region='Geographic Region';
   datalines;
1 blue  fair    23  1 blue  red      7  1 blue  medium  24
1 blue  dark    11  1 green fair    19  1 green red      7
1 green medium  18  1 green dark    14  1 brown fair    34
1 brown red      5  1 brown medium  41  1 brown dark    40
1 brown black    3  2 blue  fair    46  2 blue  red     21
2 blue  medium  44  2 blue  dark    40  2 blue  black    6
2 green fair    50  2 green red     31  2 green medium  37
2 green dark    23  2 brown fair    56  2 brown red     42
2 brown medium  53  2 brown dark    54  2 brown black   13
;
run;


/* 出租车票统计表。amount为金额，num为该金额的张数。 */
data samp.taxif;
  input amount num @@;
  cards;
10 4 12 6 13 1 15 1
16 1 19 5 20 3 23 1
24 1 25 1 26 3 27 1
32 1 47 1 48 2 49 1
52 1 55 1 58 1 81 1
;
run;


/* 二元正态分布密度  */
data dnorm2;
  a=2;
  a2=sqrt(a);
  r=0.5;
  det=a*(1-r*r);
  do x=-3 to 3 by 0.3;
    do y=-3*a2 to 3*a2 by 0.3*a2;
      z=1/(2*3.1415926*det)*exp(-0.5/det*
        (a*x*x + y*y - 2*r*a2*x*y));
      output;
    end;
  end;
  keep x y z;
run;


/* 股票年数据。
 * 变量Year(年)、DateOfHigh(最高值日期)、DowJonesHigh(道琼斯最高值)、
 * DateOfLow(最低值日期)、DowJonesLow(最低值日期),
 * LogDowHigh和LogDowLow是自然对数值。
 */
data stocks;
   input Year @7 DateOfHigh:date9. 
         DowJonesHigh @26 DateOfLow:date9. 
         DowJonesLow;
   format LogDowHigh LogDowLow 5.2 
          DateOfHigh DateOfLow date9.;
   LogDowHigh=log(DowJonesHigh);
   LogDowLow=log(DowJonesLow);
   datalines;
1954  31DEC1954  404.39  11JAN1954  279.87
1955  30DEC1955  488.40  17JAN1955  388.20
1956  06APR1956  521.05  23JAN1956  462.35
1957  12JUL1957  520.77  22OCT1957  419.79
1958  31DEC1958  583.65  25FEB1958  436.89
1959  31DEC1959  679.36  09FEB1959  574.46
1960  05JAN1960  685.47  25OCT1960  568.05
1961  13DEC1961  734.91  03JAN1961  610.25
1962  03JAN1962  726.01  26JUN1962  535.76
1963  18DEC1963  767.21  02JAN1963  646.79
1964  18NOV1964  891.71  02JAN1964  768.08
1965  31DEC1965  969.26  28JUN1965  840.59
1966  09FEB1966  995.15  07OCT1966  744.32
1967  25SEP1967  943.08  03JAN1967  786.41
1968  03DEC1968  985.21  21MAR1968  825.13
1969  14MAY1969  968.85  17DEC1969  769.93
1970  29DEC1970  842.00  06MAY1970  631.16
1971  28APR1971  950.82  23NOV1971  797.97
1972  11DEC1972 1036.27  26JAN1972  889.15
1973  11JAN1973 1051.70  05DEC1973  788.31
1974  13MAR1974  891.66  06DEC1974  577.60
1975  15JUL1975  881.81  02JAN1975  632.04
1976  21SEP1976 1014.79  02JAN1976  858.71
1977  03JAN1977  999.75  02NOV1977  800.85
1978  08SEP1978  907.74  28FEB1978  742.12
1979  05OCT1979  897.61  07NOV1979  796.67
1980  20NOV1980 1000.17  21APR1980  759.13
1981  27APR1981 1024.05  25SEP1981  824.01
1982  27DEC1982 1070.55  12AUG1982  776.92
1983  29NOV1983 1287.20  03JAN1983 1027.04
1984  06JAN1984 1286.64  24JUL1984 1086.57
1985  16DEC1985 1553.10  04JAN1985 1184.96
1986  02DEC1986 1955.57  22JAN1986 1502.29
1987  25AUG1987 2722.42  19OCT1987 1738.74
1988  21OCT1988 2183.50  20JAN1988 1879.14
1989  09OCT1989 2791.41  03JAN1989 2144.64
1990  16JUL1990 2999.75  11OCT1990 2365.10
1991  31DEC1991 3168.83  09JAN1991 2470.30
1992  01JUN1992 3413.21  09OCT1992 3136.58
1993  29DEC1993 3794.33  20JAN1993 3241.95
1994  31JAN1994 3978.36  04APR1994 3593.35
1995  13DEC1995 5216.47  30JAN1995 3832.08
1996  27DEC1996 6560.91  10JAN1996 5032.94
1997  06AUG1997 8259.31  11APR1997 6391.69
1998  23NOV1998 9374.27  31AUG1998 7539.07
;
run;



/* 健身数据: 年龄、性别、心率、锻炼、有氧运动(Aerobic)     */
data samp.fitness2;
   input Age Sex $ HeartRate 
         Exercise Aerobic;
datalines;
28  M  86  2   36.6
41  M  76  3   26.7
30  M  78  2   33.8
39  F  90  1   13.6
28  M  96  1   33.
26  M  74  2   42.7
 .  F  66  4   36.1
48  F  72  2   22.6
31  M  60  3   44.1
28  F  84  2   22.1
33  F  56  4   21.3
37  F  78  2   30.3
46  M  84  1   34.2
23  M  72  2   38.1
25  F  88  1   32.0
37  F  72  2   43.7
42  M  60  3   36.7
44  F  78  3   21.6
 .  F  70  1   22.8
25  F  60  3   36.1
24  F  74  2   29.9
29  F  66  4   38.9
27  M  62  4   44.0
24  M  72  3   44.2
36  F  80  1   26.2
24  M  82  2   18.7
23  M  54  3   70.6
28  F  76  1   23.8
30  F  66  2   28.9
25  M  54  3   41.3
48  F  72  2   28.9
23  F  68  1   18.9
22  F  78  2   39.0
23  F  66  3   36.1
46  F  54  3   28.9
31  F  84  1   21.6
45  M  60  2   47.8
27  M  90  2   43.1
26  M  66  2   28.9
26  F  84  2     .
24  M  72  3   50.1
32  F  72  1   15.7
29  M  54  3   44.8
48  F  66  2   28.9
36  F  66  2   33.2
;
run;

/* 各类工程师平均工资。
 * eng: 工程师种类。dollars: 工资。num:人数。
 */
data jobs; 
   length eng $5; 
   input eng dollars num; 
   datalines; 
Civil 27308 73273 
Aero  29844 70192 
Elec  22920 89382 
Mech  32816 19601 
Chem  28116 25541 
Petro 18444 34833 
; 
run;


/* 体重与身高数据 */
data samp.stats; 
   input height weight; 
   datalines; 
69.0  112.5 
56.5   84.0 
65.3   98.0 
62.8  102.5 
63.5  102.5 
57.3   83.0 
59.8   84.5 
62.5  112.5 
62.5   84.0 
59.0   99.5 
51.3   50.5 
64.3   90.0 
56.3   77.0 
66.5  112.0 
72.0  150.0 
64.8  128.0 
67.0  133.0 
57.5   85.0 
; 
run;


/* 各城市平均气温数据。mon:月; faren: 气温; city: 城市。 */
data citytemp; 
   input  month faren city $; 
   datalines; 
   1      40.5    Raleigh 
   1      12.2    Minn 
   1      52.1    Phoenix 
   2      42.2    Raleigh 
   2      16.5    Minn 
   2      55.1    Phoenix 
   3      49.2    Raleigh 
   3      28.3    Minn 
   3      59.7    Phoenix 
   4      59.5    Raleigh 
   4      45.1    Minn 
   4      67.7    Phoenix 
   5      67.4    Raleigh 
   5      57.1    Minn 
   5      76.3    Phoenix 
   6      74.4    Raleigh 
   6      66.9    Minn 
   6      84.6    Phoenix 
   7      77.5    Raleigh 
   7      71.9    Minn 
   7      91.2    Phoenix 
   8      76.5    Raleigh 
   8      70.2    Minn 
   8      89.1    Phoenix 
   9      70.6    Raleigh 
   9      60.0    Minn 
   9      83.8    Phoenix 
  10      60.2    Raleigh 
  10      50.0    Minn 
  10      72.2    Phoenix 
  11      50.0    Raleigh 
  11      32.4    Minn 
  11      59.8    Phoenix 
  12      41.2    Raleigh 
  12      18.6    Minn 
  12      52.5    Phoenix 
; 
run; 


/* 销售数据。dept: 部门类别。site: 城市。quarter: 季度。sales: 销售额 */
data totals;
   length dept $ 7 site $ 8;
   input dept site quarter sales;
   datalines;
Parts   Sydney  1 4043.97 
Parts   Atlanta 1 6225.26 
Parts   Paris   1 3543.97 
Repairs Sydney  1 5592.82 
Repairs Atlanta 1 9210.21 
Repairs Paris   1 8591.98 
Tools   Sydney  1 1775.74 
Tools   Atlanta 1 2424.19 
Tools   Paris   1 5914.25 
Parts   Sydney  2 3723.44 
Parts   Atlanta 2 11595.07 
Parts   Paris   2 9558.29 
Repairs Sydney  2 5505.31 
Repairs Atlanta 2 4589.59 
Repairs Paris   2 7538.56 
Tools   Sydney  2 2945.17 
Tools   Atlanta 2 1903.99 
Tools   Paris   2 7868.34 
Parts   Sydney  3 8437.96 
Parts   Atlanta 3 6847.91 
Parts   Paris   3 6789.85 
Repairs Sydney  3 4426.46 
Repairs Atlanta 3 5011.66 
Repairs Paris   3 6510.38 
Tools   Sydney  3 3767.10 
Tools   Atlanta 3 3048.52 
Tools   Paris   3 9017.96 
Parts   Sydney  4 6065.57 
Parts   Atlanta 4 9388.51 
Parts   Paris   4 8509.08 
Repairs Sydney  4 3012.99 
Repairs Atlanta 4 2088.30 
Repairs Paris   4 5530.37 
Tools   Sydney  4 3817.36 
Tools   Atlanta 4 4354.18 
Tools   Paris   4 6511.70 
;
run; 



/* 能源产量数据。year: 年。engytype: 能源类型。produceds: 产量。 */ 
data enprod; 
   input @1  year 4. @6 engytype $8. @16 produced 5.2; 
   datalines; 
1985 Coal      19.33 
1985 Gas       19.22 
1985 Petro     18.99 
1985 Nuclear    4.15 
1985 Hydro      2.97 
1985 Geotherm    .20 
1985 Biofuels    .01 
1995 Coal      21.98 
1995 Gas       21.54 
1995 Petro     13.89 
1995 Nuclear    7.18 
1995 Hydro      3.21 
1995 Geotherm    .31 
1995 Biofuels   2.95 
; 
run;


/* 拒收配件数据。site: 城市。date: 日期。badparts: 拒收的配件数。 */
data rejects; 
   informat date date9.; 
   input site $ date badparts; 
   datalines; 
Sydney  01JAN1997 22 
Sydney  01FEB1997 26 
Sydney  01MAR1997 14 
Sydney  01APR1997 18 
Sydney  01MAY1997 28 
Sydney  01JUN1997 22 
Sydney  01JUL1997 15 
Sydney  01AUG1997 18 
Sydney  01SEP1997 19 
Sydney  01OCT1997 15 
Sydney  01NOV1997 31 
Sydney  01DEC1997 23 
Atlanta 01JAN1997 18 
Atlanta 01FEB1997 22 
Atlanta 01MAR1997 20 
Atlanta 01APR1997 23 
Atlanta 01MAY1997 10 
Atlanta 01JUN1997 21 
Atlanta 01JUL1997 29 
Atlanta 01AUG1997 20 
Atlanta 01SEP1997 17 
Atlanta 01OCT1997 14 
Atlanta 01NOV1997 16 
Atlanta 01DEC1997 18 
Paris   01JAN1997 13 
Paris   01FEB1997 18 
Paris   01MAR1997 27 
Paris   01APR1997 29 
Paris   01MAY1997 26 
Paris   01JUN1997 20 
Paris   01JUL1997 28 
Paris   01AUG1997 21 
Paris   01SEP1997 12 
Paris   01OCT1997  8 
Paris   01NOV1997 12 
Paris   01DEC1997 19 
; 
run;

/* 订制蛋糕数据。
 *   LastName: 姓。Age: 年龄。PresentScore: 积分。
 *   TasteScore: 口味积分。Flavor: 风味。Layers: 层数。 
 */
data cake;
   input LastName $ 1-12 Age 13-14 PresentScore 16-17 
         TasteScore 19-20 Flavor $ 23-32 Layers 34 ;
   datalines;
Orlando     27 93 80  Vanilla    1
Ramey       32 84 72  Rum        2
Goldston    46 68 75  Vanilla    1
Roe         38 79 73  Vanilla    2
Larsen      23 77 84  Chocolate  .
Davis       51 86 91  Spice      3
Strickland  19 82 79  Chocolate  1
Nguyen      57 77 84  Vanilla    .
Hildenbrand 33 81 83  Chocolate  1
Byron       62 72 87  Vanilla    2
Sanders     26 56 79  Chocolate  1
Jaeger      43 66 74             1
Davis       28 69 75  Chocolate  2
Conrad      69 85 94  Vanilla    1
Walters     55 67 72  Chocolate  2
Rossburger  28 78 81  Spice      2
Matthew     42 81 92  Chocolate  2
Becker      36 62 83  Spice      2
Anderson    27 87 85  Chocolate  1
Merritt     62 73 84  Chocolate  1
;
run;


/* 学生分数例子。包括名字、性别、状态(status)、年、分区(section)、
 * 分数(score)、最终分数(FinalGrade)。
 */
data grade;
   input Name $ 1-8 Gender $ 11 Status $13 Year $ 15-16 
         Section $ 18 Score 20-21 FinalGrade 23-24;
   datalines;
Abbott    F 2 97 A 90 87
Branford  M 1 98 A 92 97
Crandell  M 2 98 B 81 71
Dennison  M 1 97 A 85 72
Edgar     F 1 98 B 89 80
Faust     M 1 97 B 78 73
Greeley   F 2 97 A 82 91
Hart      F 1 98 B 84 80
Isley     M 2 97 A 88 86
Jasper    M 1 97 B 91 93
;
run;


**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;

/*************************************************
**                                              **
**                  第一章                       **
**                                              **
*************************************************/

title '95级1班学生成绩排名';
data c9501;
  input name $ 1-10 sex $ math chinese;
  avg = math*0.5 + chinese/120*100*0.5;
  cards;
李明      男 92 98
张红艺    女 89 106
王思明    男 86 90
张聪      男 98 109
刘颍      女 80 110
;
run;
proc print;run;
proc sort data=c9501;
  by descending avg;
run;
proc print;run;

/* 用LIBNAME指定库名(LIBREF) */
libname mylib "c:\y1995";


/******* 求偏度、峰度标准误差的宏 ********/
%macro seskewkurt(data, variable);

proc means data = &data n skew kurtosis;
var &variable;
output out=outmeans n=n skew=skew kurtosis=kurtosis;
proc print data = outmeans;
data _null_; set outmeans;
call symput('getn', n);
call symput('getkurtosis', kurtosis);
call symput('getskew', skew);
run;

%let seskew=%sysevalf((((6*&getn)*(&getn-1))/((&getn-2)*(&getn+1)*(&getn+3)))**0.5);
%let sekurt=%sysevalf(2*&seskew*((&getn**2*(2-1))/((&getn-3)*(&getn+5)))**0.5) ;
%let zkurt = %sysevalf(&getkurtosis/&sekurt);
%let zskew = %sysevalf(&getskew/&seskew);
%put N is &getn ;
%put Skew is &getskew;
%put SE of skew is &seskew ;
%put Z score of skew is &zskew ;
%put Kurtosis is &getkurtosis ;
%put SE of kurtosis is &sekurt ;
%put Z score of Kurtosis is &zkurt ;

%mend;

%seskewkurt(dataset, variable);









/*************************************************
**                                              **
**                  第二章                       **
**                                              **
*************************************************/

**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;

/***************************/
/* §2.2 SAS用作一般高级语言 */
/***************************/

/* PUT语句 */
data;
  x=0.5;
  y=sin(x);
  put "sin(" x ")=" y;
run;

/* 用PUT语句做表
*/ 
data _null_;
  set samp.class;
  *put name sex age height weight;
  IF _n_=1 THEN DO;
    put 'Name    Sex Age Height Weight';
    put '-----------------------------';
    END;
  put name $10. sex $2. age 3. height 6.1 weight 7.1;
run;

data _null_;
  set samp.class;
  put '姓名: ' name  /
      '性别: ' sex  /
      '年龄: ' age  /
      '身高: ' height 6.1 /
      '体重: ' weight 6.1 /;
run;

/* 用FILE和PUT语句生成平方数表 */
data _null_;
  file 'sq.txt';
  put '  x     y';
  do n=1 to 100;
    y = n**2;
    put n 3. y 6.;
  end;
run;

/* 用FILE的DLM=','和PUT语句生成CSV的平方数表 */
data _null_;
  file 'sq.csv' dlm=',';
  put 'x,y';
  do n=1 to 100;
    y = n**2;
    put n y;
  end;
run;

/* 用PUT语句中加逗号生成CSV的平方数表，允许使用格式 */
data _null_;
  file 'sq.csv' dlm=',';
  put 'x,y';
  do n=1 to 100;
    y = n**2;
    put n 4. ',' y 8.;
  end;
run;

/* 例：用FILE语句加DLM选项生成CSV文件。PUT语句中变量不能使用输出格式*/
data _NULL_;
  set samp.c9501;
  file 'c9501new.csv' dlm=',';
  if _n_=1 then
    put 'name,sex,math,chinese,avg';
  put name sex math chinese avg;
run;

      
/* 例：计数循环 */
data;
  DO  i = 1  TO  20  BY  2;
    j = i**3;
    put  i  3.  j  5.;
  END;
run;
data;
  DO  i = 19  TO  1  BY  -2;
    j = i**3;
    put  i  3.  j  5.;
  END;
run;

/* 例：列数循环 */
data;
  do mon='Jan', 'Feb', 'Mar';
     put mon $4. ' is spring.';
  end;
run;


/* 例：用当型循环求素数 */
data;
  x=1333333;
  *x=28338943;
  i=3;
  DO  WHILE  (mod(x,i) ^= 0);
    i=i+2;
  END;
  if i<x then put x '不是素数';
  else  put  x  '是素数';
run;

/* 例：用当型循环求素数，列出因子分解 */
data;
  x=1333333;
  i=3;
  DO  WHILE  (mod(x,i) NE 0);
    i=i+2;
  END;
  if i<x then DO;
    f1 = i;
    f2 = x/i;
    put x '=' f1 'X ' f2;
    END;
  else put  x  '是素数';
run;
/* 思考题：求x的素因子分解。
   提示：用两重循环。
 */


/* 二分法求根程序。
   求解 x^3 - 3 x^2 - x + 3 = 0
*/
data _NULL_;
  a = -1.5;
  b = -0.5;
  x = a;
  fa = x**3 - 3*x**2 - x + 3;
  x = b;
  fb = x**3 - 3*x**2 - x + 3;

  DO UNTIL (b-a < 1E-6);
    c = (a+b)/2;
    x = c;
    fc = x**3 - 3*x**2 - x + 3;
*	put c= fc=;
    IF fa*fc <= 0 THEN DO;
       b = c;
       fb = fc;
    END;
    ELSE DO;
       a =c; 
       fa = fc;
    END;
  END;
  put 'x = ' c;
run;


data;
  n=0;
  do until (n>=5);
     n+1;
     put n=;
  end;
run;

data;
  do i=3, 7, 11 to 17 by 3 while (i**2<200);
     j=i**2;
     put i j;
  end;
run;

/* 数组：替换缺失值 */
options formdlim = '*' nodate pageno = 1 ls=66 ps=50;

data one;
 input a $ b c d e f g;
 cards;
 999  23 17581 0.0023 126 85 06 13
 joe  54 34634 0.0018 165 30 15 12
 bill 36 70451 0.0020 134 62 09 14
 mary 999 52740 0.0017 148 59 999 16
 bob  47 999   0.0016 153 999 05 999
 jack 62 83598 0.0019 142 76 12 18
 ;
run;

data two;
  set one;
  array x(6)  b c d e f g;
  do i = 1 to 6;
    if x(i) = 999 then x(i) = .;
  end;
  drop i;
run;

proc print data = two;
run;

data three;
  set one;
  array x(*) _numeric_;
  do i = 1 to dim(x);
    if x(i) = 999 then x(i) = .;
  end;
  drop i;
run;

proc print data = three;
run;

/* 数组：同一行的统计 */
data counting;
  input gender1 - gender5;
  array gender(5) gender1-gender5;
  males = 0;
  females=0;
  do i = 1 to 5;
    if gender{i}=1 then males = males + 1;
    else if gender{i}=2 then females = females + 1;
  end;
  drop i;
  cards;
 1 2 1 1 1
 2 1 2 2 1
 2 2 2 1 1
 ;
proc print;run;

/* 数组：同一行的排序 */
data one;
 input y1-y4;
 cards;
 15 36 27 4
 6 128 36 52
 14 29 54 43
 ;
data two;
  set one;
  array x(4);
    do i = 1 to 4;
      x(i) = ordinal(i, of y1-y4);
    end;
  drop i;
run;
proc print data = three;
run;

/* 二维数组：在一行中保存了一个二维数组（矩阵），
   在此行中对矩阵进行计算就使用二维数组。
    用随机模拟方法生成了100个2×2矩阵，
    矩阵元素为独立标准正态分布。计算矩阵的行列式。
*/
data ranmat;
  array mat(2,2) a11 a12 a21 a22;
  n = 100;
  do iobs=1 to n;
    do i=1 to 2;
      do j=1 to 2;
        mat(i,j) = normal(333555);
      end;
    end;
    det = mat(1,1)*mat(2,2) - mat(1,2)*mat(2,1);
    output;
  end;
run;

/* 生成3x3矩阵并计算迹 */
data aa;
  array x(3,3);
  do n=1 to 10;
    do i=1 to 3;
      do j=1 to 3;
        x(i,j) = uniform(0);
        
      end;
    end;
    drop i j;
    output;
  end;
run;
proc print;run;

data bb;
  set aa;
  array x(3,3);
  t = 0;
  do i=1 to 3;
    t = t + x(i,i);
  end;
  drop i;
run;
proc print;run;


data sales;
  input  comp1-comp10  prin1-prin6;
  ARRAY  y(*) comp1-comp10  prin1-prin6;
  tot=0;
  do i=1 to DIM(y);
    tot + y(i);
  end;
  cards;
;
run;


/* 字符串函数：trim */
data _null_;
  length s1 s2 $ 8;
  s1 = 'abcd';
  s2 = 'xyz';
  r1 = s1 || s2;
  r2 = trim(s1) || s2;
  put r1= r2=;
run;

/* 字符串函数：compbl */
data _null_;
  file print;
  s = 'A brown   fox is  running. ';
  s2 = compbl(s);
  put s=;
  put s2=;
run;

/* 字符串函数：compress */
data _null_;
  file print;
  s1 = '李明';
  s2 = '李  明';
  s3 = compress(s2);
  c1 = (s1 = s2);
  c2 = (s1 = s3);
  put s1= s2= s3= c1= c2=;
run;
data _null_;
  file print;
  s1 = '(852)1234-5678';
  s2 = compress(s1, '()-');
  s3 = compress(s1, '0123456789', 'K');
  put s1= s2= s3=;
run;
data _null_;
  s0 = '  12   5678  ';
  s1 = '#' || compress(s0) || '#';
  put s1=;
  s0 = ' A@*l^%a>din';
  s1 = '#' || compress(s0, '@*^%') || '#';
  put s1=;
run;

/* 字符串函数：tranwrd */
data _null_;
  file print;
  s1 = '北京市公安局昌平县分局(昌平县十三陵景区)';
  s2 = tranwrd(s1, '昌平县', '昌平区');
  put s1= s2=;
run;

/* 字符串函数：translate */
data _null_;
  file print;
  from = '0123456789';
  to   = '3109427856';
  phone = '01062761018';
  sec = translate(phone, to, from);
  phone2 = translate(sec, from, to);
  put phone= sec= phone2=;
run;

/* 字符串函数: SUBSTR */
data _null_;
  file print;
  s = 'file005.txt';
  s1 = substr(s, 5, 3);
  put s= s1=;
run;

data _null_;
  s = '张三,李四,王五,马六';
  name1 = scan(s, 1, ',');
  name2 = scan(s, 2, ',');
  name3 = scan(s, 3, ',');
  name4 = scan(s, 4, ',');
  name5 = scan(s, 5, ',');
  put name1= name2= name3= name4=;
run;
data _null_;
  array name(100) $ 10;
  s = '张三, 李四，王五，马六';

  s = tranwrd(s, '，', ',');

  do i=1 to 100;
     name(i) = scan(s, i, ',');
     IF name(i)=' ' THEN LEAVE;
  end;
  put name1= name2= name3=;
run;

/* 字符串函数：scan和left */
data _null_;
  length name2 $ 10;
  name =scan('James, Bond', 2, ',');
  name2 =scan('James, Bond', 2, ',');
  s = '#' || name || '#';
  * 用LEFT函数把字符串内容左对齐;
  s2 = '#' || left(name2) || '#';
  put '#name#=' s;
  put '#name2#=' s2;
run;

/* 字符串函数：scan和trim */
data _null_;
  s0 = '12   5678';
  s1 = scan(s0, 2);
  s2 = '#' || s1 || '#';
  /* 注意：scan的结果长度为200字符！*/
  s3 = '#' || trim(s1) || '#';
  /* 用trim去掉尾随空格 */
  put s2=;
  put s3=;
run;

/* 字符串函数：catx */
data _null_;
  s1 = ' a bc  ';
  s2 = '123  ';
  s3 = '   xyz     ';
  s = catx(',', s1, s2, s3);
  put s1= s2= s3= s=;
run;

/* put 和input函数 */
data _null_;
  s1 = '15.2';
  x1 = input(s1, 8.);
  y1 = s1 + 10;
  z1 = x1 + 10;

  s2 = '$123,456.78';
  x2 = input(s2, comma12.);
  y2 = s2 + 10;
  z2 = x2 + 10;
  put _all_;
run;
data _null_;
  x = 5;
  s = 'F' || put(x, Z3.) || '.txt';
  put s=;

  s1 = substr(s,2,3);
  x2 = input(s1, 3.);
  put x2=;
run;



/* 分布密度数据集生成 */
data density;
  do x=-3 to 3 by 0.05;
    fx = PDF('normal', x, 0, 1);
    ftx5 = PDF('t', x, 5);
    ftx30 = PDF('t', x, 30);
    cdfx = CDF('normal', x, 0, 1);
    output;
  end;
run;

/* 双侧分位数 */
data _null_;
  file print;
  alpha = 0.05;
  xp = probit(1 - alpha/2);
  xp10 = tinv(1 - alpha/2, 10);
  put '正态双侧' alpha '分位数=' xp;
  put 't(10)双侧' alpha '分位数=' xp10;
run;

data _null_;
  do i=1 to 5;
    x = uniform(123);
    put x 6.4;
  end;
run;


/* 回归数据模拟 */
data regsim;
    n = 30;
    a = 2.0;
    b = 1.5;
    do i=1 to n;
        x = 10 + 2*normal(166337);
        eps = 0.5*normal(3344557);
        y = a + b*x + eps;
        output;
    end;
run;

/* 离散随机数 */
data _null_;
  file print;
  p = 1/6;
  do i=1 to 600;
    y = rantbl(553311, p,p,p,p,p,p);
    put y 2. @;
    if mod(i,20)=0 then put;
  end;
run;
data _null_;
  file print;
  p = 1/6;
  array c(6) (6*0);
  do i=1 to 6000;
     x = rantbl(12345, p,p,p,p,p,p);
     put x 2. @;
     c(x) = c(x) + 1;
     if mod(i,20)=0 then put;
  end;
  put c1 c2 c3 c4 c5 c6;
run;
data _null_;
  file print;
  p = 1/6;
  array c(6) (6*0);
  do i=1 to 6000;
     x = rantbl(12345, p,p,p,p,p,p);
     put x 2. @;
     c(x) = c(x) + 1;
     if mod(i,20)=0 then put;
  end;
  s = repeat('-', 50);
  put s;
  do i = 1 to 6;
    put i 6. @;
  end;
  put;
  put s;
  do i = 1 to 6;
    put c(i) 6. @;
  end;
  put;
run;

/* SAS样本统计函数的例子。
*/
data salesd;
  array x(16);
  array sale(16);
  do j=1 to 16;
    x(j) = round(normal(0)*10+100, 10);
    xx = x(j);
    put j 2. xx 5.;
  end;
  do i=1 to 20;
    do j=1 to 16;
      sale(j) = round(x(j) + normal(0)*2);
    end;
    salesum=sum(OF sale1-sale16);
    salemin=min(OF sale1-sale16);
    salemax=max(OF sale1-sale16);
    output;
  end;
  keep i sale1-sale16 salesum salemin salemax;
run;
proc print; run;
proc means; run;

/* 模拟生成样本统计量的重复观测值 */
data ac;
    array x(20);
    do i=1 to 1000;
        do j=1 to 20;
            x(j) = sqrt(20)*normal(111);
        end;
        xm = mean(OF x1-x20);
        sk = skewness(OF x1-x20);
        output;
    end;
    drop i j;
run;

/* PROC IML例子 */
proc iml;
  reset print;

  sc = 15.25;
  vh1 = {1 2};
  vh2 = {11 22};
  vh3=5:9;
  vv1 = {3, 4, 5};
  vv2 = {30, 40, 50};
  mat1 = {1 2 3,
          1 1 1};
  mat2 = { 1 2 -1,
          -1 0  1};
  mat3 = {"Li"  "Ming",
         "Zhang"  "Chong"};

  res1 = vh1 + vh2;
  res2 = vv1 - vv2;
  res3 = mat1 # mat2;
  res4 = mat1 * vv1;
  res5 = mat1 + sc;

  A = {11 12 13 14,
       21 22 23 24,
       31 32 33 34};
  A1 = A[2,3];
  A2 = A[,3];
  A3 = A[2,];
  A4 = A[{1,3},{2,4}];
  A5 = A[2:3,2:4];
  sumr = A[,+];
  sumc = A[+,];

  mat4 = mat2`;
  mat5 = mat1 * mat4;
  mat6 = INV(mat5);
  mat7 = mat5 * mat6;

  comb1 = mat1 || mat2;
  comb2 = mat1 // mat2;

  B1 = I(4);
  B2 = J(3,1);
  B3 = J(3,4);
  B4 = J(3,4, 100);
  
  D1 = diag(A[,1:3]);
  D2 = diag(vv1);
  
  R1 = ginv(A);
  AA = {1 2 3,
        1 3 2,
        2 1 3};
  bb = {2,3,1};
  xx = solve(AA, bb);
  
  USE  samp.class;
  READ ALL;
  ratio = weight/height;
  
  READ ALL VAR {height weight};
  ratio = weight/height;

  READ ALL INTO Mat;
  ratio = Mat[,3]/Mat[,2];

  READ POINT {1 3} INTO Mat;
  ind = 1:3;
  READ POINT ind INTO Mat;

  READ ALL VAR {height weight} 
    WHERE(sex='M');

  READ ALL INTO Mat;
  cn = {'age' 'height' 'weight'};
  READ ALL VAR {'name'} INTO name;
  PRINT Mat[rowname=name colname=cn];

  CREATE newd VAR{name ratio};
  APPEND;
  CLOSE newd;
quit;

**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;

/**************************/
/* §2.3 SAS语言的数据管理  */
/**************************/

data a;
  put x= y= z=;
  input x y;
  z=x+y;
  put x= y= z=;
  cards;
10 20
100 200
;
run;

/* 用PUT和LIST语句调试数据步 */
data a;
  put x= y= z=;
  input x y;
  z=x+y;
  put _all_;
  list;
  cards;
10 20
100 200
;
run;

data c9501;
  input name $ sex $ math chinese;
  cards;
李明 男 92 98
张红艺 女 89 106
王思明 男 86 90
张聪 男 98 109
刘颍 女 80 110
;
run;


data c9501;
  input name $ 1-10 sex $ 
        math 14-16 chinese 18-20;
  cards;
李明      男  92  98
张红艺    女  89 106
王思明    男  86  90
张聪      男  98 109
刘颍      女  80 110
;
run;


/* 列格式读入紧凑数据 */
data pids;
  input year 7-10  mon 11-12  day 13-14;
  birth = mdy(mon, day, year);
  format birth yymmdd10.;
  cards;
110103197512092232
110101196902150059
;
run;
proc print;run;
    
/* 指定起始列 */
data;
  input @11 x y z;
  cards;
1 2  3   4 101 102 103
11 12      201    202 203
;
run;
proc print;run;

/* 读入日期 */
data;
  input date yymmdd8. sales;
  format date yymmdd10.;
  cards;
56-6-13    1100
67.12.15   1200
78 10 2    1300
891001     1400
19960101   1500
20020901   1600
;
run;
proc print;run;

data;
  input date yymmdd10. sales;
  format date yymmdd10.;
  cards;
56-6-13    1100
67.12.15   1200
78 10 2    1300
891001     1400
19960101   1500
20020901   1600
1956-6-13  1100
1967.12.15 1200
1978 10 2  1300
19891001   1400
19960101   1500
20020901   1600
;
run;
proc print;run;

/* INFORMAT语句 */
data;
  informat date yymmdd10.;
  input sales date;
  format date yymmdd10.;
  cards;
1100 56-6-13
1200 67.12.15
;
run;
proc print;run;

/* 冒号格式 */
data;
  input sales date : yymmdd10. ;
  format date yymmdd10.;
  datalines;
1100         56-6-13
1200 67.12.15
;
run;
proc print;run;

data _null_;
  input x 6.2 y;
  put x= y=;
  cards;
1.2    10
123456 20
3.4 30
;
run;

data;
  input num sales comma7. profit percent7.;
  datalines;
10 123,456 25.2%
15 245,889 9.8%
;
run;
proc print;run;

data _null_;
  input x : 6.2 y;
  put x= y=;
  cards;
    123456.78 10
12345678 20
3.4 30
;
run;

/* $CHARw. 输入格式 */
data;
  input a $ 1-5 b $char5. c;
  if b='' then put b=;
  cards;
abc     3s 10
abcd ab de20
abcde  .    30
;
run;
proc print;run;

data _null_;
  length s $ 3;
  input s :$6. t $;
  put s= t=;
  cards;
    ab xy
abcdefgh zw
;
run;

/* &修饰符，允许数据内有个单个空格 */
data aa;
  length name $20;
  input name & $ code $char4.;
  s = "#" || code || "#";
  method='& ';
  cards;
Li Ying  abcd
Zhang Xiaoming    efgh
   Xu Jun  limn
;
run;
proc print;run;

/* &和:修饰符, 用:找到code的开始位置 */
data ab;
  length name $20;
  input name & $ code : $char4.;
  s = "#" || code || "#";
  method='&:';
  cards;
Li Ying  abcd
Zhang Xiaoming    efgh
   Xu Jun  limn
;
run;
proc print;run;

data ac;
    set aa ab;
run;
proc print;run;

/* ~修饰符 */
data ad;
  infile datalines dsd;
  input Name : $9. Score1-Score3 Team ~ $25. Div $;
  datalines;
Smith,12,22,46,"Green Hornets, Atlanta",AAA
Mitchel,23,19,25,"High Volts, Portland",AAA
Jones,09,17,54,"Vulcans, Las Vegas",AA
;
proc print;run;

data ad;
  infile datalines dsd;
  length team $ 25;
  input Name : $9. Score1-Score3 Team $ Div $;
  datalines;
Smith,12,22,46,"Green Hornets, Atlanta",AAA
Mitchel,23,19,25,"High Volts, Portland",AAA
Jones,09,17,54,"Vulcans, Las Vegas",AA
;
proc print;run;

/* 带逗号的数字和百分数 */
data;
  input num sales comma7. profit percent7.;
  datalines;
10 123,456 25.2%
15 245,889 9.8%
;
run;
proc print;run;

/* 数值输入格式 */
data _null_;
  x1 = input('123456.78', 16.); put x1=;
  x2 = input('123456.78', 16.2); put x2=;
  x3 = input('12345678', 16.2); put x3=;
  x4 = input('15.2E3', 16.); put x4=;
  x5 = input('$123,456.78', COMMA16.); put x5=;
  x6 = input('$123,456.78', COMMA16.4); put x6=;
  x7 = input('$(12345678)', COMMA16.2); put x7=;
  x8 = input('15.8%', PERCENT16.); put x8=;
run;

/* 数值输出格式 */
data _null_;
  s1 = '#' || put(123456.78, BEST16.) || '#';
  put s1=;
  s2 = '#' || put(123456.78, BEST4.) || '#';
  put s2=;
  s3 = '#' || put(123456.78, 16.4) || '#';
  put s3=;
  s4 = '#' || put(-123456.78, COMMA16.4) || '#';
  put s4=;
  s5 = '#' || put(-123456.78, DOLLAR16.4) || '#';
  put s5=;
  s6 = '#' || put(-1.782, PERCENT10.2) || '#';
  put s6=;
  s7 = 'x' || put(1, Z3.); put s7=;
  s8 = put(1.23E-6, PVALUE10.); put s8=; 
run;

/* 字符型输入格式 */
data a;
  input s $5.;
  ss = '#' || s || '#';
  cards;
  cde
a cde
  .
run;
proc print;run;
data b;
  input s $CHAR5.;
  ss = '#' || s || '#';
  cards;
  cde
a cde
  .
run;
proc print;run;

/* 字符型输出格式 */
data _null_;
  s1 = '  a b ';
  r1 = '#' || put(s1, $8.) || '#'; 
  r2 = '#' || put(left(s1), $8.) || '#'; 
  r3 = put(s1, $QUOTE8.);
  put '#' s1 $6. '#';
  put r1= r2= r3=;
run;

/* 日期时间输入和输出 */
data _null_;
  d1 = input('12SEP1995', DATE9.);
  put d1 yymmdd10.;
  d2 = input('SEP1995', MONYY7.);
  put d2 yymmdd10.;
  t = input('15:28:25.22', TIME12.);
  put t time12.2;
  put t hhmm8.2;
  put t hhmm6.;
run;

/* 两变量之间的分配 */
data;
  input id $1-4 name $char5. sex $;
  s = "#" || name || "#";
  cards;
1001KittyF
1002 John M
1003  Mary F
;
proc print;run;

data;
  length id $ 8 name $ 5;
  input id $1-4 name $ sex $1.;
  s1 = "#" || name || "#";
  s2 = "#" || sex || "#";
  cards;
1001Kitty F
1002   John M
1003 Mary  F
;
proc print;run;


/* @@标志 */
data;
  input x y @@;
  datalines;
1 1  2 4  3 9
4 16 5 25 6 36
;
run;
proc print;run;

/* @标志 */
data;
  input s $ @ ;
  if s^='yes' then delete;
  else input x y;
  datalines;
no 1 1  
yes 2 4  
yes 3 9
;
run;
proc print;run;


/* 一个观测占用多行 */
data;
  input tmp1$ name $;
  input tmp2$ age;
  input;
  keep name age;
  datalines;
姓名: 李明
年龄: 39

姓名: 张聪
年龄: 12

;
run;
proc print;run;

data;
  input #1 tmp1$ name $
        #2 tmp2$ age;
        #3;
  keep name age;
  datalines;
姓名: 李明
年龄: 39

姓名: 张聪
年龄: 12

;
run;
proc print;run;

data;
  input tmp1$ name $ /
        tmp2$ age /
        ;
  keep name age;
  datalines;
姓名: 李明
年龄: 39

姓名: 张聪
年龄: 12

;
run;
proc print;run;


/* 读过行尾的问题 */
data;
  input x y;
  datalines;
1 1  
2   
3 9
4 16
;
run;
proc print;run;

data;
  infile datalines truncover;
  input x y;
  datalines;
1 1  
2   
3 9
4 16
;
run;
proc print;run;

/* ATTRIB 语句 */
data sales;
  ATTRIB name LABEL="姓名" LENGTH=$10
         date LABEL="日期" FORMAT=yymmdd10. 
              INFORMAT=mmddyy10.
         amount LABEL="金额" FORMAT=10.2;
  input name $ 1-10 date amount;
  cards;
张鹏      10/15/1998 2000
李志明    1/3/99     1500
王敏      11/5/99    3000
;
run;
proc print noobs label;
run;

/* INFILE语句 */
data c9501;  
  infile 'd:\users\sas\stud.txt';  
  input name $ 1-10 sex $ math chinese;
run;
proc print;run;

/* INFILE语句与.csv文件 */
data;
  infile 'class.csv' dsd firstobs=2;
  input name $ sex $ age height weight;
run;
proc print;run;    

/* INFILE语句与制表符分隔文件 */
data;
  infile 'tabd.txt' delimiter='09'x firstobs=2;
  input x y;
run;
proc print;run;    
data;
  infile datalines delimiter='09'x firstobs=2;
  input x y;
  datalines;
13	56.5	84
13	65.3	98
14	64.3	90
12	56.3	77
12	59.8	84.5
15	66.5	112
11	51.3	50.5
15	62.5	112.5
14	62.8	102.5
14	69	112.5
14	63.5	102.5
15	67	133
12	57.3	83
13	62.5	84
12	59	99.5
16	72	150
12	64.8	128
11	57.5	85
15	66.5	112
;
run;
proc print;run;    



data;
  missing x y z;
  input name $ age @@;
  if age=.X then
  put name 'have age of missing X type.';
  cards;
John 19 Mary x Denny 21
Kitty y Harte z
;
run;
proc print;run;


/* 8.0版本的IMPORT */
PROC IMPORT OUT= WORK.c9501a 
            DATAFILE= "D:\users\sas\c9501.xls" 
            DBMS=EXCEL2000 REPLACE;
     GETNAMES=YES;
RUN;

/* 9.1版本的IMPORT */
PROC IMPORT OUT= WORK.class2 
     DATAFILE= "E:\disk\course\statsoft\class.xls" 
     DBMS=EXCEL REPLACE;
     SHEET="CLASS"; 
     GETNAMES=YES; /* 第一行为变量名 */
     MIXED=NO;     /* 数值型与文本行混合列是否当作字符型列 */
     SCANTEXT=YES; /* 是否自动确定字符型变量长度 */
     USEDATE=YES;  /* 是否自动识别日期 */
     SCANTIME=YES; /* 是否自动识别时间 */
RUN;

/* SAS批量导入Excel中多个sheet的宏。 */
%let dir=C:\demo;
%macro ReadXls (name);
libname excellib excel "&dir.\&name";
proc sql noprint; 
create table sheetname as
select tranwrd(memname, "''", "'" ) as sheetname
from sashelp.vstabvw
where libname= "EXCELLIB";
select count(DISTINCT sheetname) into :number
from sheetname;
select DISTINCT sheetname into :sheet1 - :sheet% left(&number)
from sheetname;
quit;
libname excellib clear; 
%do i=1 %to &cnt_sht;
proc import datafile= "&dir.\&name" 
out=sheet&i replace;
sheet= "&&sheet&i";
getnames=yes;
mixed=yes;
run;
proc append base=master data=sheet&i force; 
run;
%end ;
%mend ReadXls;
%ReadXls (test.xls)

    
/* LIBNAME 访问ORACLE数据库 */
libname mydblib oracle user=testuser 
    password=testpass path=hrdept_002;
proc print data=mydblib.employees; 
   where dept='CSR010';   
run;

/* LIBNAME 访问ODBC连接的MySQL数据库 */
LIBNAME mydb ODBC datasrc=sasmysql;
proc print data=sasmysql.c9501;
run;

/* LIBNAME 访问Excel文件 */
LIBNAME excl EXCEL 'class.xls';
proc datasets library=excl nolist;
  contents data=_all_ nods;
run;quit;
proc print data=excl.class;
run;

/* LIBNAME 访问ODBC连接的Excel文件，
** 这种办法可以在SAS9.1.3中访问Excel2007的xlxs格式
** 首先在Windows控制面板中找到管理工具，
** 在其中找到``数据源(ODBC)''，定义一个新数据源xlsxSample,
** 选择引擎时注意选择包括.xlsx的Excel引擎，格式采用Excel12.0，
** 点击``选择工作簿''按钮选一个.xlsx格式的Excel文件作为输入来源。
** 如下程序可以把这个Excel文件作为一个数据库访问，各个sheet是数据集。
** 因为sheet命中有``$''符号所以用``myexcel."c9501$"n''的格式访问数据集。
** 使用完毕后关闭了SAS与ODBC的连接。
** 如果需要访问另外一个Excel文件，
** 把要访问的文件复制到原来这个文件位置就可以了。
*/
LIBNAME myexcel ODBC datasrc=xlsxSample;
proc datasets library=myexcel nolist;
  contents data=_all_ nods;
run;quit;
proc print data=myexcel."c9501$"n;
run;
libname myexcel;




/* 用PROC ACCESS访问外部数据库 */
PROC ACCESS DBMS=SYBASE;
    CREATE samp.sales.ACCESS;
    SERVER='DBIN';
    DATABASE='Finance';
    TABLE='Sales';
    USER='guest';
    PASSWORD='anyone';
    CREATE samp.salesall.VIEW;
    SELECT ALL;
RUN;

/* SQL直连与ODBC数据源 */
PROC SQL;
    CONNECT TO ODBC AS myos 
      (DSN='sasmysql');
    CREATE VIEW sasuser.c9501b AS
        SELECT * FROM CONNECTION TO myos (
             SELECT name, math FROM c9501 );
    DISCONNECT FROM myos;
QUIT;


/* 用PROC CONTENTS查看数据集结构 */
proc contents data=samp.class;
run;
/* 用PROC DATASETS查看数据集结构 */
proc datasets library=sasuser nolist;
  contents data=c9501;
quit;

/* SET语句 */
data sales0;
  ATTRIB name LABEL="姓名" LENGTH=$10
         date LABEL="日期" FORMAT=yymmdd10.
              INFORMAT=mmddyy10.
         amount0 LABEL="金额" length=$10;
  input name $ 1-10 date amount0 $;
  cards;
张鹏      10/15/1998 2000
李志明    1/3/99     休假
王敏      11/5/99    3000
;
run;
data sales;
  set sales0;
  if amount0='休假' then do;
     out=1;
     amount=.;
     end;
  else do;
    out=0;
    amount = input(amount0, 16.);
    end;
run;
proc print noobs label;
run;

data samp.cls;
  set c9501;
run;

/* SET语句不预先把变量置为缺失值的演示 */
data c9501a;
  length name $ 10;
  put _n_= 'Before input: ' name=;
  input name $ 1-10 sex $ 11-12
        math 14-16 chinese 18-20;
  put _n_= 'After input:  ' name=;
  put;
  cards;
李明      男  92  98
张红艺    女  89 106
王思明    男  86  90
张聪      男  98 109
刘颍      女  80 110
;
run;
data c9501b;
  length name $ 10;
  put _n_= 'Before set: ' name=;
  set samp.c9501;
  put _n_= 'After set:  ' name;
  put;
run;

proc datasets library=samp nolist;
  contents data=class;
quit;


/* 修改用SET语句读入的数据集 */
data c9501a;
  set c9501;
  if chinese>100 then chinese=100;
run;


/* 用KEEP语句取列子集 */
data c9501b;
  set c9501;
  keep name avg;
run;

/* 用DROP语句取列子集 */
data c9501b;
  set c9501;
  drop sex math chinese;
run;

/* 数据集输入选项 */
data c9501b;
  set c9501(keep=name avg);
run;

/* 数据集输出选项 */
data c9501b(keep=name avg);
  set samp.c9501;
run;

/* 数据集横向拆分 */
data a(keep=name sex) b(keep=name math chinese);
  set samp.c9501;
run;


/* 产生一个行、列数都较大的数据集 */
data huge;
  array x(10);
  do i=1 to 10000;
    do j=1 to 10;
      x(j) = normal(0);
    end;
    output;
  end;
  drop i j;
run;

/* 用数据集选项选行、列子集 */
data new;
  set huge(obs=100 keep=X1 X2);
run;

/* 随机抽样 */
data new2;
  set huge(keep=X1 X2);
  sortid=uniform(111);
run;
proc sort data=new2; by sortdid; run;
data new3;
  set new2(obs=100);
run;

/* 子集IF语句 */
data c9501c;
  set c9501;
  IF math>=90 and chinese>=100;
run;

/* 取行子集的WHERE语句 */
data c9501c;
  set c9501;
  WHERE math>=90 and chinese>=100;
run;

/* 取行子集的WHERE选项 */
data c9501c;
  set c9501(WHERE=
    (math>=90 and chinese>=100));
run;


/* 用WHERE选项纵向拆分数据集 */
data c9501m(where=(sex='男'))
     c9501f(where=(sex='女'));
  set samp.c9501;
run;
proc print data=c9501m;run;
proc print data=c9501f;run;

/* 用SET和OUTPUT纵向拆分数据集 */
data c9501m c9501f;
  set c9501;
  select(sex);
    when('男') output c9501m;
    when('女') output c9501f;
    otherwise put sex= '有错';
  end;
  drop sex;
run;
proc print data=c9501m;run;
proc print data=c9501f;run;

/* 自定义循环内部需要用OUTPUT语句输出观测 */
data sq;
  do i=1 to 10;
    j=i*i;
    output;
  end;
run;
proc print;run;

/* INFILE读取CSV文件，数据有用逗号分隔的向量用双撇号保护，
   有日期和时间在一起的逗号分隔向量。
   原始数据在sampled子目录中。
 */
%LET infile='multiseries.csv';
%LET outfile='table.csv';
%LET DateOrigin='01OCT2011:00:00:00'dt;

data k;
  length iobs dt date time y1-y7 8;
  length dts $ 9999 y1s  y3s y4s $ 9999;
  infile &infile missover
         dsd lrecl=64000 pad firstobs=2;
  input dts $ y1s $ y2 y3s $ y4s $ y5 y6 y7;
  iobs = _n_;
  j = 0;
  do while (1);
    j + 1;
    s = scan(dts, j, ',');
    if s = ' ' then leave;

    s1 = scan(s, 1, ' ');
    s2 = scan(s, 2, ' ');
    date = input(s1, yymmdd10.);
    time = input(s2, time8.);
    dt = dhms(date, hour(time), minute(time), second(time));
    dt = dt - &DateOrigin;

    s = scan(y1s, j, ',');
    y1 = input(s, 16.);

    s = scan(y3s, j, ',');
    y3 = input(s, 16.);

    s = scan(y4s, j, ',');
    y4 = input(s, 16.);
    output;
    end;
  format date yymmdd10.;
  format time time8.;
  *format dt datetime19.;
  keep iobs dt date time y1-y7;
run;

PROC EXPORT DATA=WORK.K 
     OUTFILE= &outfile 
     DBMS=CSV REPLACE;
RUN;


/* SET语句的POINT=选项 */
data new;
  do i=1 to 19 by 2;
    set samp.class point=i;
    output;
  end;
  stop;
run;
proc print;run;

/* SET语句的END=选项 */
data one;
  input x @@; cards;
1 2 3 4 5
;
data two;
  set one end=lastline;
  avg + x;
  if lastline then do;
     avg = avg / _n_;
     output;
     end;
   keep avg;
run;
proc print;run;
data three;
  set one;
  if _n_=1 then set two;
run;
proc print;run;

data ab;
  length id 8 drug $1 effect 8;
  set aa;
  effect=a; drug='A'; output;
  effect=b; drug='B'; output;
  keep id drug effect;
run;
proc print;run;
data count;
  set samp.gpa END=lastline;
  IF lastline THEN do;
     n=_n_;
     OUTPUT;
     end;
  keep n;
run;
proc print;run;

/* RETAIN语句 */
data aa;
  retain prod 1;
  input x @@;
  prod = prod * x;
  cards;
1 2 3 4 5
;
run;
proc print;run;

data ac;
  retain x1 0;
  input x @@;
  d = x - x1;
  x1 = x;
  cards;
1 2 5 9 16
;
run;
proc print;run;

/* RETAIN语句，只保留最后一行。
   infile语句的eof=指定一个标签，
   最后一行时运行跳到标签处。
   如果是正常的外部文件就可以用END=指定一个指示变量。
 */
data ad;
  retain prod 1;
  infile datalines eof=lastline;
  input x @@;
  prod = prod * x;
  delete;
  keep prod;
  lastline:
    output;
    return;
  cards;
1 2 3 4 5
;
run;
proc print;run;

/* SET语句应该不把变量置为缺失，此例是一个反例：
   SET只保证用SET语句读入的变量在下一轮隐含循环
   不被置为缺失，但是用赋值定义的变量在下一轮隐含
   循环中仍置为缺失。
 */
data aa;
  input x @@;
  cards;
1 2 3 4 5
;
run;
data ab;
  put _n_ 2. '  1:' prod= x=;
  if _n_=1 then prod = 1;
  put _n_ 2. '  2:' prod= x=;
  set aa;
  put _n_ 2. '  3:' prod= x=;
  prod = prod * x;
  put _n_ 2. '  4:' prod= x=;
run;
proc print;run;

/* 纵向合并 */    
data class1;
  input id a;
  cards;
1 11
2 12
;
data class2;
  input a id;
  cards;
21 3
22 4
;
data class3;
  input id b;
  cards;
5 91
6 92
;
run;

data classes;
  set class1 class2
      class3;
run;

/* 纵向合并，包含来源信息 */
data new;
  set c9501m(in=male) c9501f(in=female);
  if male=1 then sex='男';
  if female=1 then sex='女';
run;

data new;
  length name $ 10 sex $ 8 math 8 chinese 8 avg 8;
  set samp.c9501m(IN=m)
    samp.c9501f(IN=f);
  IF m THEN sex='Male';
  IF f THEN sex='Female';
run;
%p

/* 直接按行号进行横向合并 */    
data c9501u(keep=name sex)
     c9501v(keep=math)
     c9501w(keep=chinese);
  set samp.c9501;
run;
data new;
  merge c9501u c9501v c9501w;
run;


/* 横向合并例子，一对一 */    
data c9501x;
  set samp.c9501;
  keep name sex;
run;
data c9501y;
  set samp.c9501;
  keep name math chinese;
run;

data c9501x(keep=name sex)
     c9501y(keep=name math chinese);
    set samp.c9501;
run;
proc sort data=c9501x;
  by name;
run;
proc sort data=c9501y;
  by name;
run;
data new;
  merge c9501x c9501y;
  by name;
run;
proc print;run;

/* 不匹配的行的处理: 包含不匹配的行 */
data d1;
  input id $ x;
  cards;
a 1
b 2
;
data d2;
  input id $ y;
  cards;
a 21
c 23
;
proc sort data=d1;
  by id;
proc sort data=d1;
  by id;
data new;
  merge d1 d2;
  by id;
proc print;run;

/* 不匹配的行的处理: 仅包含匹配的行 */
data new;
  merge d1(IN=ina) d2(IN=inb);
  by id;
  IF ina and inb;
proc print;run;
    
/* 不匹配的行的处理: 仅包含出现于A的行 */
data new;
  merge d1(IN=ina) d2;
  by id;
  IF ina;
proc print;run;
    

/* 一对多横向合并例子 */
data d1;
   input id $ x;
   cards;
a 11
b 12
;
run;
data d2;
    input id $ y;
    cards;
a 21
a 22
;
run;
data new;
  merge d1 d2;
  by id;
run;
proc print;run;
    
/* 另一个一对多合并例子 */
data aa;
  set samp.c9501;
  good = math>=85 AND chinese>=100;
run;
proc print;run;
data te;
  input good teacher $;
  cards;
1  AAA
0  BBB
;
run;
proc sort data=aa;
  by good;
proc sort data=te;
  by good;
data ab;
  merge aa te;
  by good;
run;
proc print;run;

/* 多对多的横向合并例子 */
data d3;
  input id $ x;
  cards;
a 11
b 12
a 13
;
run;
data d4;
  input id $ y;
  cards;
a 21
a 22
b 23
a 24
;
run;
proc sort data=d3; by id;
proc sort data=d4; by id;
data new;
  merge d3 d4;
  by id;
run;
proc print;run;

/* 用UPDATE语句更新数据集 */    
data upd;
  input name $ sex $ chinese;
  cards;
张红艺 男 .
王思明 . 91
;
run;

proc sort data=c9501;
  by name;
run;
proc sort data=upd;
  by name;
run;

data new;
  update c9501 upd;
  by name;
run;
proc print;run;

/* 更新时对需要重新计算的列按需重新计算 */    
data new;
  update c9501 upd(in=in_upd);
  if in_upd=1 then 
     avg = math*0.5 + chinese/120*100*0.5;
  by name;
run;
proc print;run;


**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;

/***************************/
/* §2.4 宏                 */
/***************************/
/* 宏的典型例子 */    
%MACRO SALREAD;
%DO NP=1 %TO 10;
  %let ff="df&NP..txt";
  %let fd=df&NP;
  data &fd;
    infile &ff;
    input date yymmdd10. sales;
    persid=&NP;
  run;
%END;

%LET setstm=SET;
%DO NP=1 %TO 10;
  %LET setstm=&setstm df&NP;
%END;
%PUT &setstm;
data whole;
  &setstm;
run;
%MEND SALREAD;

%SALREAD;

/******************************************/
/*           宏变量例子                    */
/******************************************/

/* 自动宏变量例子 */    
proc print data=samp.class noobs label;
    title 'Listing of the CLASS dataset';
    footnote1 "Created &systime. &sysday, &sysdate9.";
    footnote2 "on the &sysscp. System using Release &sysver.";
run;

/* 自定义宏变量例子 */    
%let var1= SAS Macro; 
%let var2 = "SAS Macro"; 
%let var3 = "SAS' Macro"; 
%let var4 = 3+4;
%PUT &var1. &var2. &var3.;
%PUT The value of var4 is &var4.;

/* 宏PUT的特殊用法 */
%put _all_;
%put ERROR: unexpected result.;
%put WARNING: maybe wrong.;
%put NOTE: some bad data?;

/* 宏变量定义中使用宏替换的例子 */    
/* 假设有数据文件file11.txt，其中包含日期和销售额数据 */    
%LET i=11;
%LET fname=file&i;
%LET ff="&fname..txt";
%PUT &ff.;

data &fname.;
  infile &ff.;
  input date yymmdd10. sales;
  persid=&i;
run;

/* 这是前面的宏展开后的效果 */    
data file11;
  infile "file11.txt";
  input date yymmdd10. sales;
  persid=11;
run;

/* 多个&引发两次扫描的例子 */
%LET varpre=sale;
%LET k=95;
%LET sale95=15.3;
%PUT &sale95;

%PUT &varpre&k;

%PUT &&sale&k;

%PUT &&varpre&k;

%PUT &&&varpre&k;


/* 自动宏变量例子 */    
%PUT &sysdate &sysdate9 &sysday;
%PUT &syssite &sysscp &sysscpl &sysuserid;

/* 用宏变量避免重复的例子 */    
%LET dsn=samp.class;

title "Dataset &dsn";
proc contents data=&dsn.;
run;
proc print data=&dsn.(obs=10);
run;
title;


/******************************************/
/*               宏例子                    */
/******************************************/

/* 最简单的宏：只是替换一个语句中的一段 */
/* 原意 */
data new;
  set c9501f c9501m;
run;
/* 使用宏替换语句片段 */
%macro ds;
  c9501f c9501m
%mend ds;
data new2;
  set %ds;
run;
/* 使用宏替换片段, 包括分号在内 */
%macro dsc;
  c9501f c9501m;
%mend dsc;
data new3;
  set %dsc
run;
/* 使用宏替换一段, 宏定义中调用宏变量，宏变量可以在调用前才赋值 */
%macro dsv;
  &d1 &d2
%mend dsv;
%let d1=c9501f;
%let d2=c9501m;
data new4;
  set %dsv;
run;
/* 使用宏替换一段，带有宏参数 */
%macro abc(d1,d2);
  &d1 &d2
%mend abc;
data new5;
  set %abc(c9501f,c9501m);
run;

/* 简单的宏，无参数, 包含一段SAS程序 */
%MACRO dsinfo;
title "Dataset &dsn";
proc contents data=&dsn.;
run;
proc print data=&dsn.(obs=10);
run;
title;
%MEND dsinfo;

%LET dsn=samp.class;
%dsinfo

/* 带有参数的宏 */    
%MACRO dsinfov(dsn);
title "Dataset &dsn";
proc contents data=&dsn.;
run;
proc print data=&dsn.(obs=10);
run;
title;
%MEND dsinfov;

%dsinfov(samp.class)

/* 带有参数的宏：横向合并 */
%macro merge2(d1,d2,dout, byvar);
   proc sort data=&d1.;
     by &byvar.;
   proc sort data=&d2.;
     by &byvar.;
   data &dout.;
     merge &d1. &d2.;
     by &byvar.;
   run;
%mend merge2;
%merge2(c9501x, c9501y, new6, name)


/* 带有缺省参数的宏 */    
%MACRO dsinfod(dsn=&syslast, nobs=10);
title "Dataset &dsn";
proc contents data=&dsn.;
run;
proc print data=&dsn.(obs=&nobs.);
run;
title;
%MEND dsinfod;

%dsinfod(dsn=samp.class, nobs=5)

data; x=1;y=2; run;
%dsinfod()

%dsinfod(nobs=5, dsn=samp.class)


/* 用宏屏蔽一段程序 */    
data; x=1;y=2; run;
%MACRO debug1;
  proc contents data=&syslast;
  run;
  proc print data=&syslast(obs=5);
  run;
%MEND debug1;
%*debug1;

%LET DEBUG=*;
%MACRO debug1;
  proc contents data=&syslast;
  run;
  proc print data=&syslast(obs=5);
  run;
%MEND debug1;
&DEBUG %debug1;


/******************************************/
/*       宏的流程控制语句例子               */
/******************************************/

/* 宏IF选择程序片段 */
data aa; input x y; cards;
1 2
11 12
;
run;
%MACRO myprog1(dsn=&syslast., neat=yes);
    proc print data=&dsn.(obs=10)
      %IF &neat=yes %THEN label noobs;
      %ELSE double;
    ;
    run;
%MEND myprog1;
options mprint;
%myprog1(dsn=aa, neat=yes)
%myprog1(dsn=aa, neat=no)

/* 宏IF与复合语句 */
%MACRO myprog1b(dsn=&syslast., neat=yes);
  %IF &neat=yes %THEN %DO;
    proc print data=&dsn.(obs=10) label noobs;
    %END;
  %ELSE %DO;
    proc print data=&dsn.(obs=10) double;
    %END;
    run;
%MEND myprog1b;
%myprog1b(dsn=aa, neat=yes)
%myprog1b(dsn=aa, neat=no)

    
/* 宏IF选择程序段 */
%MACRO myprog2(dsn=&syslast., debug=);
  %IF &debug NE %THEN %DO;
    proc print data=&dsn.(obs=10);
    run;
  %END;
  proc means data=&dsn;
    var x y;
  run;
%MEND myprog2;
options mprint;
%myprog2(dsn=aa, debug=true)
%myprog2(dsn=aa)

    
/* 最简单的宏循环 */
%MACRO mac;
%DO i=1 %TO 10;
  %PUT &i.;
%END;
%MEND mac;

%mac


/* 用宏循环读入多个数据文件的例子。带有生成数据部分。 */
%MACRO gen(nfiles);
  %DO i=1 %TO &nfiles.;
    data _null_;
      file "test\df&i..txt";
      x1 = (&i. - 1)*10 + 1;
      x2 = &i. * 10;
      do x=x1 to x2;
        y = x*x;
        put x 5. y 8.;
      end;
    run;
  %END;
%MEND gen;

%gen(5);

%MACRO rd(nfiles);
  %DO i=1 %TO &nfiles.;
     data d&i.;
       infile "test\df&i..txt";
       input x y;
     run;
  %END;

  %LET s=SET;
  %DO i=1 %TO &nfiles.;
     %LET s=&s. d&i.;
  %END;
  %PUT &s.;

  data dd;
    &s.;
  run;
%MEND rd;

%rd(5);

/* 合并的另一办法 */
%MACRO solb(nfiles);
  data dd;
    set
      %DO i=1 %TO &nfiles.;
        d&i.
      %END;
    ;
  run;
%MEND slob;


/* 宏运行相关系统选项 */
/* MPRINT选项要求显示宏运行产生的SAS程序片段 */
options mprint;
data; x=11; y=12; run;
%MACRO debug1;
  proc print data=&syslast;
  run;
%MEND debug1;
%debug1


/******************************************/
/*           宏引文例子                    */
/******************************************/

/* 用%STR()包含分号、不配对撇号、括号等 */
%LET myvar=%STR(a%'); %PUT &myvar;
%LET myvar=%STR(b%"); %PUT &myvar;
%LET myvar=%STR(log%(12); %PUT &myvar;
%LET myvar=%STR(345%)); %PUT &myvar;

%LET myvar=%STR(90%%); %PUT &myvar;
%LET myvar=%STR(90%%%'); %PUT &myvar;

%LET printit=%STR(proc print; run;);

/* %STR()中仍可替换宏变量和宏 */
%LET var1=SAS Macro;
%LET var2=%STR(New &var1);
%PUT &var2;

/* %NRSTR()中禁止替换宏变量和宏 */
%LET var1=SAS Macro;
%LET var2=%NRSTR(John%'s &var1);
%PUT &var2;
%PUT This is the result of %NRSTR(%NRSTR);
/*'*/

/* %NRSTR()中禁止替换宏变量和宏，保护宏自变量缺省值 */
%MACRO credits(d=%NRSTR(Mary&Stacy&Joan Ltd.));
  footnote "Designed by &d";
%MEND credits;

%credits()

/* %BQUOTE保护充分替换后的结果而不是本身 */
%LET var1='abc';
%LET var2=%BQUOTE(%SUBSTR(&var1,1,3));
%PUT &var2;

data _null_;
  call symputx('var', 'B&G Coorporated');
run;
%LET com=%NRBQUOTE(&var);
%PUT &com;

/* %UNQUOTE解除保护 */
%LET city=Beijing;
%LET oth=%nrstr(&city);
%LET unq=%unquote(&oth);
%put oth: &oth;
%put unq: &unq;

/******************************************/
/*           宏函数例子                    */
/******************************************/

/* 字符型宏函数。%LENGTH求长度。注意提示文本不需要撇号包围。 */    
%LET var1=Macro;
%LET VL=%LENGTH(&var1.);
%PUT &var1. 长度为 &vl.;
%LET var2=;
%LET VL2=%LENGTH(&var2.);
%PUT 宏变量var2长度为 &vl2.;

/* %SUBSTR函数*/
%LET ss=A brown fox;
%LET s1=%SUBSTR(&ss, 3, 5);
%PUT &s1;

/* %INDEX函数*/
%LET ss=A brown fox;
%LET ii=%INDEX(%BQUOTE(&ss), ow);
%PUT ow 在 &ss.的第&ii.位置出现;

/* %SCAN, 不指定分隔符*/
%LET var1=NO.1 student, excellent!;
%LET x1=%SCAN(%BQUOTE(&var1),1);
%LET x3=%SCAN(%BQUOTE(&var1),3);
%LET x4=%SCAN(%BQUOTE(&var1),4);
%LET xe=%SCAN(%BQUOTE(&var1),99);
%PUT &x1  &x3  &x4  &xe;
/* %SCAN, 用%STR保护分隔符, 用%BQUOTE保护自变量值*/
%LET var1=NO.1 student, excellent!;
%LET x = %SCAN(%BQUOTE(&var1), 2, %STR( ));
%PUT &var1.中第二项为&x.;

/* 用%QSCAN和循环读取列表中每一个*/
%MACRO exscan(source);
  %LET i=1;
  %LET x=%QSCAN(%BQUOTE(&source), &i, %STR( ));
  %DO %WHILE( &x ^= %STR());
    %PUT Item.&i  &x;
    %LET i=%EVAL(&i+1);
    %LET x = %QSCAN(%BQUOTE(&source), &i, %STR( ));
  %END;
%MEND exscan;
%LET var1=NO.1 student, excellent!;
%exscan(%BQUOTE(&var1))


/* 在宏IF和宏循环条件中数值表达式自动计算。*/
%macro impcomp;
  %let x = 1+2;
  %IF &x=3 %THEN %put 自动计算表达式.;
  %ELSE %put 没有计算;
%mend impcomp;
%impcomp

/* 在宏IF和宏循环条件中数值表达式自动计算不能用实数。*/
%macro prb;
  %IF 10.0>9.0 %THEN %put 程序正确;
  %ELSE %put 不能进行非整数比较！;
%mend prb;
%prb

/* prb的改正 */
%macro prbc;
  %IF %sysevalf(10.0>9.0) %THEN %put 程序正确;
  %ELSE %put 不能进行非整数比较！;
%mend prbc;
%prbc

/* 数值表达式计算。%EVAL只能进行整数类型的计算。 */
%LET y1=2;
%LET y2 = 2+1;
%LET y3=%EVAL(&y2 + 1);
%LET y4=%SYSEVALF(4.8/2);
%PUT &y1 &y2 &y3 &y4;

/* 用%SYSFUNC调用数据步函数 */
%LET x=3.14159/2;
%LET y=%SYSFUNC(sin(&x), 6.2);
%PUT &y;

/* 用%SYSFUNC可以调用PUTN，不能用PUT */
%LET s=%SYSFUNC(putn(1,Z3.));
%PUT &s;

/* 用%SYSFUNC和PUTN转换日期格式 */
%LET s=%SYSFUNC(putn("&sysdate9"d, yymmdd10.));
%PUT &sysdate9 &s;
footnote "演示结果：%SYSFUNC(putn("&sysdate9"d, yymmdd10.))";

/*************************************/
/*      自定义宏函数                  */
/*************************************/
/* 用宏变量传递结果 */
%macro dsexist(dsn);
  %global exist;
  %if &dsn ne %then %do;
    data _null_;
      stop;
      set &dsn;
    run;
    %end;
  %if &syserr=0 %then %let exist=1;
  %else %let exist=0;
%mend dsexist;

%dsexist(sasuser.mydat)
data new;
  if &exist then dsname='sasuser.mydat';
run;

/* 宏函数exist判断数据集是否存在 */
%macro exist(dsn);
  %sysfunc(exist(&dsn))
%mend exist;

%macro test(dsn);
  %if %exist(&dsn) %then 
     %put &dsn.数据集存在;
  %else %put &dsn.数据集不存在;
%mend test;
%test(samp.class)
%test(samp.notexist)

/* 宏函数currdate返回年月日格式的当前日期 */
%macro currdate;
  %qsysfunc(date(), yymmdd10.)
%mend currdate;

title 'C9501数据集列表';
footnote "演示日期: %currdate";
proc print data=samp.c9501;run;


/*************************************/
/*      宏与数据步的信息交换           */
/*************************************/

/* SYMPUTX 的例子 */
%MACRO spe1;
data books;
  set samp.c9501bk end=lastobs;
  ta + amount;
  if lastobs then do;
    call symputx('nobs', _n_);
    call symputx('total', ta);
  end;
  drop ta;
run;
footnote "&nobs.个人共花费&total.元";
proc print;run;
footnote;
%MEND spe1;

%spe1

/* SYMPUTX 的例子 */
data aa;
  input cid city $;
  cards;
110 北京
230 上海
;
run;

%MACRO spe2;
data _null_;
  set aa;
  call symputx('city' || put(cid,best3.), 
         city);
run;
%put &city110 &city230;
%MEND spe2;

%spe2

/* 数据步中静态使用宏变量的例子  */
%LET nmax=5;
DATA aa(drop=i j);
  array x(&nmax);
  do i=1 to 20;
    do j=1 to &nmax;
      x(j) = normal(112233);
    end;
    output;
  end;
run;
/* 数据步中静态使用宏变量的例子: 赋值  */
%LET nobs=5;
DATA ab;
  n = &nobs;
  do i=1 to n;
    j = i*i; output;
  end;
run;
/* 数据步中静态使用宏变量的例子: RETAIN */
%LET nobs=5;
DATA ac;
  retain n &nobs;
  do i=1 to n;
    j = i*i; output;
  end;
run;


/* SYMGET 的例子 */
data bb;
  input cid temp;
  cards;
110 20
230 30
;

%MACRO spe3;
data bc;
  set bb;
  city = symget('city' || put(cid, best3.));
run;
proc print;run;
%MEND spe3;

%spe3

/* 用数据集控制宏的运行 */
data cntr; 
  length dsname $ 20;
  input dsname $ nobs; 
  cards;
samp.class 5
;
run;
%MACRO controled;
  data _null_;
    set cntr;
    call symputx('dsn', dsname);
    call symputx('nobs', nobs);
  run;
  title "Dataset &dsn";
  proc contents data=&dsn.;
  run;
  proc print data=&dsn.(obs=&nobs.);
  run;
  title;
%MEND controled;
%controled

/* 用多行数据集控制多次运行 */
data cntr; 
  length dsname $ 20;
  input dsname $ nobs; 
  cards;
samp.class 5
sashelp.air 8
;
run;
%MACRO controled;
  data _null_;
    set cntr end=lastobs;
    call symputx('dsn' || 
      trim(left(put(_n_, best12.))), dsname);
    call symputx('nobs' || 
      trim(left(put(_n_, best12.))), nobs);
    if lastobs then
       call symputx('npars', _n_);
  run;
  %DO ipar=1 %TO &npars;
    title "Dataset &&dsn&ipar";
    proc contents data=&&dsn&ipar;
    run;
    proc print data=&&dsn&ipar(obs=&&nobs&ipar);
    run;
  %END;
  title;
%MEND controled;
options symbolgen mprint;
%controled

/* 控制数据集中同时保存宏变量名和宏变量值 */
data cntr; 
  length varname varval $ 40;
  input varname $ varval $;
  cards;
dsn   samp.class
;
run;
%macro controled;
  data _null_;
    set cntr;
    call symputx(varname, varval);
    call symputx('varname', varname);
  run;
  %PUT varname is &varname;
  %PUT &varname stores &&&varname;
%mend controled;
%controled

/* 控制数据集中同时保存多组宏变量名和宏变量值 */
/* 此例的结果还不理想，不论是&&&vars&i还是&&&&vars&i都不能返回samp.class  */
data cntr; 
  length varname varval $ 40;
  input varname $ varval $;
  cards;
dsn   samp.class
nobs  5
;
run;
%macro controled;
  data _null_;
    set cntr end=lastobs;
    call symputx(varname, varval);
    call symputx('vars' ||
        trim(left(put(_n_, 8.))), varname);
    if lastobs then
        call symputx('nvars', _n_);
  run;
  %DO i=1 %TO &nvars;
    %PUT vars&i is &&vars&i;
    %LET tmpname=&&vars&i;
    %PUT &&vars&i stores &&&tmpname;
  %END;
%mend controled;
%controled

    
/* SELECT INTO生成宏变量的例子 */
proc sql noprint;
  select count(*), 
        sum(amount) format=best5.
    into :nstudent, :total
	from samp.c9501bk;
quit;
%put &nstudent. &total.;

/* SELECT INTO生成宏变量数组的例子 */
proc sql noprint;
  select name, amount
    into :name1-:name9999, 
        :amount1-:amount9999
    from samp.c9501bk;
  %let nstudents=&sqlobs;
quit;
%put &nstudents;
%put &name3 &amount3;

/* SELECT INTO生成空格分隔值列表宏变量 */
proc sql noprint;
  select name, amount
    INTO :names SEPARATED BY ' ', 
         :amounts SEPARATED BY ' '
	FROM samp.c9501bk;
quit;
%put &names --- &amounts;

/* 利用宏变量数组 */
%macro ma;
  %do ii=1 %to &nstudents.;
    %put 第&ii.个学生是：&&name&ii.;
  %end;
%mend ma;
%ma


/* 利用空格分隔值列表宏变量 */
%macro vl(vars);
  %do ivar=1 %to 100000;
    %let val=%scan(&vars., &ivar.);
    %if &val.= %then %goto endd;
    %put NO.&ivar.  &val;
  %end;
  %endd:
%mend;
%vl(&names)

/* 宏数组的例子。使用&&, &&& */
%macro test;
%LET v1=x;
%LET v2=y;
%LET v3=z;
%LET nv=3;
%LET x=11;
%LET y=12;
%LET z=13;
%DO ii=1 %TO &nv.;
  %LET cv=&&v&ii;
  %PUT &&&cv;
%END;
%mend test;

%test



/******************************************/
/*             宏应用实例                  */
/******************************************/

/* 宏应用实例：用宏循环按某变量分组后运行多个过程。
 * 希望对class数据集，按性别分组后对每组
 * 分别用PROC MEANS和PROC TABULATE计算简单统计量。
 * 如果使用BY语句，则每个过程的两个组是在一起的。
 * 我们希望每个组的两个分析在一起。
*/
%macro exa;
  %let vname=sex;
  %let dsn=samp.class;
  %let vars=height weight;
  %let outrtf=testout.rtf;
  
  proc sql noprint;
    select distinct &vname.
      into :disv1-:disv999
      from &dsn.;
  quit;
  %let ndisv=&sqlobs.;

  ods rtf file="&outrtf."
      startpage=no bodytitle;
  ods noproctitle;
  %do i=1 %to &ndisv.;
    title "分组：&vname.=&&disv&i";
    proc means data=&dsn mean std;
      where &vname="&&disv&i";
      var &vars.;
    run;
    title;
    proc tabulate data=&dsn;
      where &vname="&&disv&i";
      var &vars.;
      table &vars., mean std;
    run;
  %end;
  ods rtf close;
%mend exa;
%exa

/* 宏应用实例：根据某分组变量拆分数据集.
 * 生成subd1和subd2数据集，分别包含女生和男生观测。
 */
%macro exa;
  %let dsn=samp.class;
  %let vname=sex;
  
  proc sql noprint;
    select distinct &vname.
      into :disv1-:disv999
      from &dsn.;
  quit;
  %let ndisv=&sqlobs.;

  data %do i=1 %to &ndisv.; subd&i %end;;
    set &dsn;
    %do i=1 %to &ndisv.;
      %if &i>1 %then else;
      if &vname="&&disv&i" then output subd&i;
    %end;
  run;
%mend exa;
%exa


/* 读取存放在一个文件列表中的各个文件的内容  */
%macro readb(indexf);

  data index;
    length fname $ 20;
    infile "&indexf";
    input fname $;
  run;
  proc print;run;


  data _null_;
     set index end=lastline;
     if lastline then 
       call symputx('nfiles', _n_);
  run;

  %DO ii=1 %TO &nfiles;
    data _null_;
      set index(firstobs=&ii obs=&ii);
      call symputx('fname', fname);
    run;

    data new&ii;
      infile "test\&fname";
      input x y;
    run;
  %END;
%mend readb;

%readb(test\flist.txt)


    
**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;

/******************************************/
/*              PROC SQL例子              */
/******************************************/

/* PROC SQL 例子: 全部内容 */
proc sql;
  select *
    from samp.c9501;
quit;

/* PROC SQL 例子: 列子集 */
proc sql;
  select name, math
    from samp.c9501;
quit;

/* PROC SQL 例子: 行子集 */
proc sql;
  select name, math
    from samp.c9501
    where sex='男';
quit;

/* PROC SQL 例子: DISTINCT关键字 */
proc sql;
  select DISTINCT sex
    from samp.c9501;
quit;

/* PROC SQL 例子: 定义新变量 */
proc sql;
  select name, math+chinese AS total
    from samp.c9501;
quit;
proc sql;
  select name, math+chinese AS total
    from samp.c9501
    where CALCULATED total>=200;
quit;

/* PROC SQL 例子: ORDER BY子句 */
proc sql;
  select name, math
    from samp.c9501
    where sex='男'
    ORDER BY math DESC;
quit;

/* PROC SQL 例子: GROUP BY子句分组汇总 */
proc sql;
  select sex, mean(math) as mm
    from samp.c9501
    GROUP BY sex;
quit;

/* PROC SQL 例子: 用GROUP BY子句和HAVING子句挑选分组 */
proc sql;
  select sex, mean(math) as mm
    from samp.c9501
    GROUP BY sex
    HAVING mean(chinese)>=100;
quit;

proc sql;
  select sex, mean(math) as mm
    from c9501
    GROUP BY sex
    HAVING mm>=90;
quit;

/* PROC SQL 例子: 查询结果保存到数据集 */
proc sql;
  CREATE TABLE subd AS
    select name, math
      from samp.c9501
      where sex='男'
      order by math desc;
  select * from subd;
quit;

/* PROC SQL 例子: 查询结果保存到视图 */
proc sql;
  CREATE VIEW totd AS
    select name, 
        math+chinese AS total
      from samp.c9501;
quit;
proc print data=totd;run;

/* 视图的例子 */
data orig;
  input name $ x1 x2;
  cards;
A 1 2
B 3 4
;
run;
proc sql;
  create view oview as
    select name, x1+x2 as tot
      from orig;
quit;
proc print data=oview;run;
data orig;
  input name $ x1 x2;
  cards;
A -1 -2
B -3 -4
;
run;
proc print data=oview;run;

/* PROC SQL 例子: 一对一连接 */
proc sql;
  create table c9501x AS
    select name, sex from samp.c9501;
  create table c9501y AS
    select name, math, chinese from samp.c9501;
quit;
  
proc sql;
  select a.name, math
    from c9501x AS a, c9501y AS b
    where a.name=b.name
          and sex='男'
    order by math desc;
quit;

/* 用数据步做一对一连接且只保留匹配行 */
proc sort data=c9501x; by name;
proc sort data=c9501y; by name;
data new;
  merge c9501x(IN=_a) c9501y(IN=_b);
  by name;
  IF _a AND _b;
run;

/* PROC SQL 例子: 一对多连接 */
data d1;
  input id $ x;
  cards;
a 11
b 12
;
data d2;
  input id $ y @@;
  cards;
a 21
a 22
;
proc sql;
  select a.id, x, y
    from d1 AS a, d2 AS b
    where a.id=b.id;
quit;


/* PROC SQL 例子: 笛卡儿积式连接 */
data d1;
  input x @@; cards;
11 12
;
data d2;
  input y @@; cards;
21 22
;
proc sql;
  select x, y from d1, d2;
quit;

/* PROC SQL 例子: 多对多连接 */
data d3;
  input id $ x;
  cards;
a 11
b 12
a 13
;
run;
data d4;
  input id $ y;
  cards;
a 21
a 22
b 23
a 24
;
run;
proc sql;
  select a.id, x, y
    from d3 AS a, d4 AS b
    where a.id=b.id;
quit;

/* 用数据步做多对多连接。a组只有3行结果 */
proc sort data=d3; by id;
proc sort data=d4; by id;
data new;
  merge d3 d4;
  by id;
run;
proc print;run;       

/* PROC SQL 例子: 一对一连接不匹配丢弃 */
data d5;
  input id $ x;
  cards;
a 11
b 12
;
data d6;
  input id $ y;
  cards;
a 21
c 23
;
run;
proc sql;
  select d5.id, x, y
    from d5, d6
    where d5.id=d6.id;
quit;

/* PROC SQL 例子: LEFT JOIN */
proc sql;
  select a.id, x, y
    from d5 AS a LEFT JOIN d6 AS b
    ON a.id=b.id;
quit;

/* PROC SQL 例子: RIGHT JOIN */
proc sql;
  select d5.id, x, y
    from d5 RIGHT JOIN d6
    ON d5.id=d6.id;
quit;

/* PROC SQL 例子: FULL OUTER JOIN */
proc sql;
  select d5.id, x, y
    from d5 FULL OUTER JOIN d6
    ON d5.id=d6.id;
quit;
proc sql;
  select COALESCE(d5.id, d6.id) AS id, x, y
    from d5 FULL OUTER JOIN d6
    ON d5.id=d6.id;
quit;

/* IN 条件 */
proc sql;
  select *
    from samp.class
    where age IN (15,16);
quit;

/* IN 条件与子查询 */
proc sql;
  select name, math
    from c9501 AS a
    where sex IN
     (SELECT sex
        from c9501 AS b
        group by sex
        having mean(chinese)>=100);
quit;

proc sql;
  select *
    from samp.class
    where age IN (
      select age
        from samp.class
        group by age
        having mean(height)>=65
    )
  ;
quit;

/* 用PROC SQL做非一对一横向合并 */
PROC SQL;
  SELECT *
    FROM 
     (SELECT *, math>=85 AND chinese>=100 AS good
        FROM samp.c9501) AS a, te
    WHERE a.good=te.good;
QUIT;


/* 用PROC SQL求同生日 */
title '找出生日相同的人: PROC SQL';
data cb;
  input name $ 1-8 birth :yymmdd10.;
  format birth yymmdd10.;
  label name='姓名'  birth='生日';
  cards;
李明      78-6-1
王思明    78-5-19
张聪      78-6-1
刘颖      78-10-18
张红艺    79-5-19
;
run;
proc sql;
  select name, birth
    from cb a
    where birth in (
      select birth
      from cb b
      where b.name ^= a.name)
    order by a.birth;
run;quit;

/* 横向合并的方法 */
proc sql;
  select a.name, a.birth
    from cb AS a, cb AS b
    where a.name ^= b.name
          and a.birth=b.birth
    order by a.birth;
run;quit;

/* 用CREATE TABLE语句保存查询结果到数据集。 */
proc sql;
  CREATE TABLE bsame AS
    select name, birth
      from cb a
      where birth in select birth
        from cb b
        where b.name ^= a.name
      order by a.birth;
run;quit;
proc print data=bsame label;
  id name;
  by birth;
run;

/* 只要月、日相同就算同生日。用了WHERE中的计算 */
proc sql;
  select a.name, a.birth
    from cb AS a, cb AS b
    where a.name ^= b.name
          and month(a.birth)=month(b.birth)
          and day(a.birth)=day(b.birth)
    order by month(a.birth), day(a.birth);
run;quit;

/* 只要月、日相同就算同生日。用了CREATE VIEW和DROP VIEW */
proc sql;
  create view datamd as
    select name, birth,
      month(birth)*100+day(birth) AS md
      from cb;
  select name, birth
    from datamd a
    where md in (
      select md
        from datamd b
        where b.name ^= a.name)
    order by a.md;
  drop view datamd;
run;quit;

/* 用数据步和FREQ找同生日 */
title '找出生日相同的人: PROC FREQ';
proc freq data=cb noprint;
  tables birth / out=bfreq;
run;
proc sort data=cb; by birth;
proc sort data=bfreq; by birth;
data bsame;
  merge cb bfreq;
  by birth;
  IF count>1;
run;
proc print data=bsame label noobs;
  var name;
  by birth;
  id birth;
run;
proc datasets library=work nolist;
  delete bfreq bsame;
run;


/* 完全用数据步找同生日, 用RETAIN语句。
   对三个人同生日会有重复输出。
 */
title '找出生日相同的人: 数据步';
proc sort data=cb;
  by birth;
run;
proc print;run;

data b;
  length name1 $12;
  set cb;
  retain birth1 name1;
  /* 用birth1, name1保留上一行观测 */
  IF birth=birth1 THEN DO;
    name2=name; birth2=birth;
    name=name1; birth=birth1;
    output;
    name=name2; birth=birth2;
    output;
    END;
  name1=name;
  birth1=birth;
  keep name birth;
run;
proc print;
run;

/* 完全用数据步找同生日, 用BY组的LAST.和FIRST.功能 */
proc sort data=cb;
  by birth;
run;
data c;
  set cb;
  by birth;
  if first.birth and last.birth then delete;
run;
proc print;run;


/* 工资单例子: 职工号(IdNumber)、性别(Sex)、工作类型代码(Jobcode)、
 * 工资(Salary)、生日(Birth)、入职日期(Hired)。
 * 可用于SQL。
 */
data payroll;
   input IdNumber $ 1-4 Sex $ 6 Jobcode $ 8-10
         Salary 12-16 @18 Birth date7. 
         @26 Hired date7.;
   format birth hired mmddyy8.;
   datalines;    
1009 M TA1 28880 02MAR59 26MAR92
1017 M TA3 40858 28DEC57 16OCT81
1036 F TA3 39392 19MAY65 23OCT84
1037 F TA1 28558 10APR64 13SEP92
1038 F TA1 26533 09NOV69 23NOV91
1050 M ME2 35167 14JUL63 24AUG86
1065 M ME2 35090 26JAN44 07JAN87
1076 M PT1 66558 14OCT55 03OCT91
1094 M FA1 22268 02APR70 17APR91
1100 M BCK 25004 01DEC60 07MAY88
;
run;
/* 用PROC SQL查询，分组计算平均*/
proc sql;
   select Jobcode,
          count(jobcode) as number label='Number',
          avg(int((today()-birth)/365.25)) as avgage
             format=2. label='Average Age',
          avg(salary) as avgsal
             format=dollar8. label='Average Salary'
      from payroll
      group by jobcode
      having avgage ge 30;
   title1 'Summary Information for Each Job Category';
   title2 'Average Age 30 or Over';
quit;

/* 用CREATE TABLE保存查询结果到数据集 */
proc sql;
  create table bonus as
    select IdNumber, Salary format=dollar8.,
        salary*.025 as Bonus format=dollar8.
      from payroll;
  title 'BONUS Information';
  select *
    from bonus;
quit;
title;


/* OilProd: 原油产量，包括Country(国家)、日产原油(BarrelsPerDay)  */
data OilProd;
   input Country $ 1-24 @26 BarrelsPerDay comma9.;
   format barrelsperday comma9.;
   datalines;
Algeria                  1,400,000
Canada                   2,500,000
China                    3,000,000
Egypt                      900,000
Indonesia                1,500,000
Iran                     4,000,000
Iraq                       600,000
Kuwait                   2,500,000
Libya                    1,500,000
Mexico                   3,400,000
Nigeria                  2,000,000
Norway                   3,500,000
Oman                       900,000
Saudi Arabia             9,000,000
United States of America 8,000,000
United Arab Emirates     2,000,000
United Kingdom           3,000,000
Venezuela                3,000,000
USSR (former)            7,000,000
;
run;

/* OilRsrvs: 原油储量数据，变量Country(国家)、储量(Barrels)  */
data OilRsrvs;
   input Country $ 1-24 @26 Barrels comma15.;
   format barrels comma15.;
   datalines;
Algeria                    9,200,000,000
Canada                     7,000,000,000
China                     25,000,000,000
Egypt                      4,000,000,000
Gabon                      1,000,000,000
Indonesia                  5,000,000,000
Iran                      90,000,000,000
Iraq                     110,000,000,000
Kuwait                    95,000,000,000
Libya                     30,000,000,000
Mexico                    50,000,000,000
Nigeria                   16,000,000,000
Norway                    11,000,000,000
Saudi Arabia             260,000,000,000
United Arab Emirates         100,000,000
United Kingdom             4,500,000,000
United States of America  30,000,000,000
Venezuela                 65,000,000,000
USSR (Former)             65,500,000,000
;
run;

/* INNER JOIN例子 */
proc sql;
   title 'Oil Production/Reserves of Countries';
   select p.country, barrelsperday 'Production', 
          barrels 'Reserves'
      from oilprod p, oilrsrvs r
      where p.country = r.country
      order by barrelsperday desc;
quit;

/* WorldCityCoords: 世界各大城市坐标数据, 变量
 * City(城市名称)、Country(所在国家)、Latitude(纬度)、Longitude(经度).
 */
data worldcitycoords;                                   
   input City $ 1-25 Country $ 28-46 Latitude 48-50     
         Longitude 53-56;                               
   datalines;                                              
Kabul                      Afghanistan          35    69
Algiers                    Algeria              37     3
Buenos Aires               Argentina           -34   -59
Cordoba                    Argentina           -31   -64
Tucuman                    Argentina           -27   -65
Adelaide                   Australia           -35   138
Alice Springs              Australia           -24   134
Brisbane                   Australia           -27   153
Darwin                     Australia           -12   131
Melbourne                  Australia           -38   145
Perth                      Australia           -32   116
Sydney                     Australia           -34   151
Vienna                     Austria              48    16
Nassau                     Bahamas              26   -77
Chittagong                 Bangladesh           22    92
Brussels                   Belgium              51     4
Belize                     Belize               17   -88
Kindley AFB                Bermuda              33   -65
La Paz                     Bolivia             -16   -69
Belem                      Brazil               -1   -48
Belo Horizonte             Brazil              -20   -44
Brasilia                   Brazil              -16   -48
Curitiba                   Brazil              -25   -49
Fortaleza                  Brazil               -4   -38
Porto Alegre               Brazil              -30   -51
Recife                     Brazil               -9   -35
Rio de Janeiro             Brazil              -23   -43
Salvador                   Brazil              -13   -38
Sao Paulo                  Brazil              -23   -46
Sofia                      Bulgaria             43    23
Phnom Penh                 Cambodia             11   105
Calgary                    Canada               51  -114
Havre                      Canada               48  -110
Kingston                   Canada               44   -76
London                     Canada               43   -81
Moose Jaw                  Canada               50  -105
Montreal                   Canada               45   -73
Ottawa                     Canada               45   -76
Port Arthur                Canada               48   -89
Quebec                     Canada               47   -71
St. John                   Canada               45   -66
Toronto                    Canada               44   -79
Victoria                   Canada               48  -123
Winnipeg                   Canada               50   -98
Punta Arenas               Chile               -53   -71
Santiago                   Chile               -33   -71
Valparaiso                 Chile               -33   -71
Chongquing                 China                29   106
Shanghai                   China                31   121
Baranquilla                Colombia             11   -75
Bogota                     Colombia              4   -75
Cali                       Colombia              3   -76
Medellin                   Colombia              6   -75
Brazzaville                Congo                -4    15
Guantanamo Bay             Cuba                 20   -76
Havana                     Cuba                 24   -82
Prague                     Czech Republic       51    14
Copenhagen                 Denmark              56    12
Santo Domingo              Dominican Republic   18   -70
Cairo                      Egypt                30    31
San Salvador               El Salvador          14   -89
Guayaquil                  Ecuador             -21   -80
Quito                      Ecuador               0   -78
Addis Ababa                Ethiopia              9    39
Asmara                     Ethiopia             15    39
Helsinki                   Finland              60    25
Lyon                       France               46     5
Marseilles                 France               43     5
Nantes                     France               47    -1
Nice                       France               44     7
Paris                      France               49     2
Strasbourg                 France               48     8
Cayenne                    French Guiana         5   -52
Berlin                     Germany              52    13
Hamburg                    Germany              53    10
Hannover                   Germany              52    10
Mannheim                   Germany              49     8
Munich                     Germany              49    11
Accra                      Ghana                 5     0
Gibraltar                  Gibraltar            37    -5
Athens                     Greece               38    24
Thessaloniki               Greece               40    23
Guatemala City             Guatemala            14   -90
Georgetown                 Guyana                7   -58
Port Au Prince             Haiti                18   -72
Tegucigalpa                Honduras             15   -87
Hong Kong                  Hong Kong            22   114
Budapest                   Hungary              47    19
Reykjavik                  Iceland              65    22
Ahmenabad                  India                22    72
Bangalore                  India                13    77
Bombay                     India                19    73
Calcutta                   India                22    88
Madras                     India                14    80
Nagpur                     India                22    80
New Delhi                  India                28    77
Djakarta                   Indonesia            -6   107
Kupang                     Indonesia           -10   123
Makassar                   Indonesia            -6   119
Medan                      Indonesia             3    99
Palembang                  Indonesia            -3   105
Surabaya                   Indonesia            -7   113
Abadan                     Iran                 30    48
Meshed                     Iran                 36    59
Tehran                     Iran                 36    51
Baghdad                    Iraq                 33    44
Mosul                      Iraq                 36    44
Dublin                     Ireland              53    -6
Shannon                    Ireland              53    -9
Jerusalem                  Israel               32    35
Tel Aviv                   Israel               33    35
Milan                      Italy                45     9
Naples                     Italy                41    14
Rome                       Italy                42    12
Fukuoka                    Japan                33   130
Sapporo                    Japan                44   141
Tokyo                      Japan                36   140
Amman                      Jordan               32    36
Nairobi                    Kenya                -1    37
Pyongyang                  Korea, North         39   126
Seoul                      Korea, South         37   127
Beirut                     Lebanon              34    35
Monrovia                   Liberia               6   -11
Benghazi                   Libya                33    21
Tananarive                 Madagascar          -19    47
Kuala Lumpur               Malaysia              4   102
Penang                     Malaysia              5   100
Guadalajara                Mexico               21  -103
Merida                     Mexico               21   -89
Mexico City                Mexico               19   -99
Monterrey                  Mexico               26  -100
Vera Cruz                  Mexico               19   -97
Casablanca                 Morocco              33    -7
Katmandu                   Nepal                28    85
Amsterdam                  Netherlands          52     5
Auckland                   New Zealand         -37   175
Christchurch               New Zealand         -43   172
Wellington                 New Zealand         -41   175
Managua                    Nicaragua            12   -86
Lagos                      Nigeria               6     3
Bergen                     Norway               60     5
Oslo                       Norway               60    11
Karachi                    Pakistan             25    67
Lahore                     Pakistan             31    74
Peshwar                    Pakistan             34    71
Panama City                Panama                9   -79
Port Moresby               Papua New Guinea     -9   148
Ascuncion                  Paraguay            -25   -57
Lima                       Peru                -13   -77
Manila                     Philippines          14   121
Krakow                     Poland               51    20
Warsaw                     Poland               52    21
Lisbon                     Portugal             39   -10
San Juan                   Puerto Rico          18   -67
Bucharest                  Romania              44    27
Kiev                       Russia               50    30
Leningrad                  Russia               60    30
Minsk                      Russia               54    27
Moscow                     Russia               56    38
Odessa                     Russia               46    31
Tashkent                   Russia               41    69
Tbilisi                    Russia               42    45
Vladivostok                Russia               44   132
Volgograd                  Russia               49    44
Dhahran                    Saudi Arabia         26    51
Jedda                      Saudi Arabia         21    39
Riyadh                     Saudi Arabia         24    47
Dakar                      Senegal              15   -17
Singapore                  Singapore             1   104
Mogadiscio                 Somalia               2    49
Cape Town                  South Africa        -34    18
Johannesburg               South Africa        -26    28
Pretoria                   South Africa        -26    28
Aden                       Yemen                13    45
Barcelona                  Spain                41     3
Madrid                     Spain                40    -4
Valencia                   Spain                39     0
Colombo                    Sri Lanka             7    80
Khartoum                   Sudan                15    32
Paramaribo                 Suriname              6   -56
Stockholm                  Sweden               59    19
Zurich                     Switzerland          47     8
Damascus                   Syria                33    36
Tainan                     Taiwan               23   120
Taipei                     Taiwan               25   121
Dar es Salaam              Tanzania             -7    39
Bangkok                    Thailand             14   100
Port of Spain              Trinidad and Tobago  11   -61
Tunis                      Tunisia              37    10
Adana                      Turkey               37    35
Ankara                     Turkey               40    33
Istanbul                   Turkey               41    29
Izmir                      Turkey               38    27
Belfast                    Northern Ireland     54    -6
Birmingham                 England              52    -2
Cardiff                    Wales                51    -3
Edinburgh                  Scotland             56    -3
Glasgow                    Scotland             56    -4
London                     England              51     0
Montevideo                 Uruguay             -35   -56
Caracas                    Venezuela            10   -67
Maracaibo                  Venezuela            10   -71
Da Nang                    Vietnam              17   108
Hanoi                      Vietnam              21   106
Ho Chi Minh City (Saigon)  Vietnam              11   107
Belgrade                   Yugoslavia           45    20
Acapulco                   Mexico               17  -100
Beijing                    China                40   116
San Jose                   Costa Rica           10   -85
Hamilton                   Bermuda              32   -65
Vancouver                  Canada               49  -124
Kingston                   Jamaica              18   -77
;
run;

/* Countries数据集: 各国家基本情况数据。
 * 包括Name(国家名称)、Capital(首都)、Population(人口数)、
 * Area(面积)、Continent(所在洲)、UNDate(加入联合国年)。
 */
data Countries;                                                                                                 
   input Name $ 1-35 Capital $ 37-55 Population 57-65
         Area 67-75 Continent $ 77-107 UNDate 109-112;
   datalines;
Afghanistan                         Kabul                17070323    251825 Asia                            1946
Albania                             Tirane                3407400     11100 Europe                          1955
Algeria                             Algiers              28171132    919595 Africa                          1962
Andorra                             Andorra la Vella        64634       200 Europe                          1993
Angola                              Luanda                9901050    481300 Africa                          1976
Antigua and Barbuda                 St. John's              65644       171 Central America and Caribbean   1981
Argentina                           Buenos Aires         34248705   1073518 South America                   1945
Armenia                             Yerevan               3556864     11500 Asia                            1992
Australia                           Canberra             18255944   2966200 Australia                       1945
Austria                             Vienna                8033746     32400 Europe                          1955
Azerbaijan                          Baku                  7760064     33400 Asia                            1992
Bahamas                             Nassau                 275703      5400 Central America and Caribbean   1973
Bahrain                             Manama                 591800       300 Asia                            1971
Bangladesh                          Dhaka                1.2639E8     57300 Asia                            1974
Barbados                            Bridgetown             258534       200 Central America and Caribbean   1966
Belarus                             Minsk                10508000     80100 Europe                          1945
Belgium                             Brussels             10162614     11800 Europe                          1945
Belize                              Belmopan               211069      8900 Central America and Caribbean   1981
Benin                               Porto Novo            5394881     43500 Africa                          1960
Bermuda                             Hamilton                60594       100                                    .
Bhutan                              Thimphu               1756214     18100 Asia                            1971
Bolivia                             La Paz                7795410    424200 South America                   1945
Bosnia and Herzegovina              Sarajevo              4697040     19700 Europe                          1992
Botswana                            Gaborone              1372453    224600 Africa                          1966
Brazil                              Brasilia             1.6031E8   3286500 South America                   1945
Brunei                              Bandar Seri Begawan    287822      2200 Asia                            1984
Bulgaria                            Sofia                 8887111     42900 Europe                          1955
Burkina Faso                        Ouagodougou          10235326    105900 Africa                          1960
Burundi                             Bujumbura             6185632     10700 Africa                          1962
Cambodia                            Phnom Penh           10366614     70200 Asia                            1955
Cameroon                            Yaounde              13261994    183600 Africa                          1960
Canada                              Ottawa               28392302   3849674 North America                   1945
Cape Verde                          Praia                  427188      1600 Africa                             .
Cayman Islands                      Georgetown              23228       100 Central America and Caribbean      .
Central African  Republic           Bangui                3173103    240300 Africa                          1960
Chad                                N'Djamena             5521118    495800 Africa                          1960
Channel Islands                                            146436       100 Europe                             .
Chile                               Santiago             14089101    292100 South America                   1945
China                               Beijing              1.2022E9   3696100 Asia                            1945
Colombia                            Bogota               35930188    440800 South America                   1945
Comoros                             Moroni                 535246       700 Africa                          1975
Congo                               Brazzaville           2471223    132000 Africa                          1960
Congo, Democratic Republic of       Kinshasa             43106529    905400 Africa                          1960
Costa Rica                          San Jose              3375083     19700 Central America and Caribbean   1945
Cote D'Ivoire                       Yamoussoukro         14437516    124500 Africa                          1960
Croatia                             Zagreb                4744505     21800 Europe                          1992
Cuba                                Havana               11173523     42800 Central America and Caribbean   1945
Cyprus                              Nicosia                737226      3600 Asia                            1960
Czech Republic                      Prague               10511029     30400 Europe                          1993
Denmark                             Copenhagen            5239356     16600 Europe                          1945
Djibouti                            Djibouti               417089      8900 Africa                          1977
Dominica                            Roseau                  88871       300 Central America and Caribbean   1978
Dominican Republic                  Santo Domingo         7903469     18700 Central America and Caribbean   1945
Ecuador                             Quito                10782691    105000 South America                   1945
Egypt                               Cairo                59912259    385200 Africa                          1945
El Salvador                         San Salvador          5809949      8100 Central America and Caribbean   1945
England                             London               49293170     50400 Europe                          1945
Equatorial Guinea                   Malabo                 414059     10800 Africa                          1968
Eritrea                             Asmera                3231677     45300 Africa                          1993
Estonia                             Tallinn               1633006     17400 Europe                          1991
Ethiopia                            Addis Ababa          59291170    437800 Africa                          1945
Fiji                                Suva                   771563      7100 Oceania                         1970
Finland                             Helsinki              5119178    130600 Europe                          1955
France                              Paris                58412558    210000 Europe                          1945
French Guiana                       Cayenne                102000     43700 South America                      .
Gabon                               Libreville            1150275    103300 Africa                          1960
Gambia (The)                        Banjul                 968493      4100 Africa                          1965
Georgia, Republic of                Tbilisi               5737236     26900 Asia                            1992
Germany                             Berlin               81890690    137700 Europe                          1973
Ghana                               Accra                17395511     92100 Africa                          1957
Gibraltar                           Gibraltar               30297       100 Europe                             .
Greece                              Athens               10669583     51000 Europe                          1945
Grenada                             St. George's            94931       100 Central America and Caribbean   1974
Guatemala                           Guatemala City       10827127     42000 Central America and Caribbean   1945
Guinea                              Conakry               6455275     94900 Africa                          1958
Guinea-Bissau                       Bissau                1108869     13900 Africa                          1974
Guyana                              Georgetown             736216     83000 South America                   1966
Haiti                               Port-au-Prince        6555255     10700 Central America and Caribbean   1945
Honduras                            Tegucigalpa           5367613     43300 Central America and Caribbean   1945
Hong Kong                           Victoria              5857414       400 Asia                               .
Hungary                             Budapest             10421148     35900 Europe                          1955
Iceland                             Reykjavik              266614     36700                                 1946
India                               New Delhi            9.2901E8   1222600 Asia                            1945
Indonesia                           Jakarta              2.0239E8    741100 Asia                            1950
Iran                                Tehran               66261493    632500 Asia                            1945
Iraq                                Baghdad              20086891    168000 Asia                            1945
Ireland                             Dublin                3574032     27100 Europe                          1955
Isle of Man                         Douglas                 70693       200 Europe                             .
Israel                              Jerusalem             5101000      8000 Asia                            1949
Italy                               Rome                 58713508    116300 Europe                          1955
Jamaica                             Kingston              2580291      4200 Central America and Caribbean   1962
Japan                               Tokyo                1.2635E8    145900 Asia                            1956
Jordan                              Amman                 4000210     34300 Asia                            1955
Kalaallit Nunaat                    Nuuk                    57564    840000                                    .
Kazakhstan                          Almaty               17438936   1049200 Asia                            1992
Kenya                               Nairobi              28520558    225000 Africa                          1963
Kiribati                            Tarawa                  78772       300 Oceania                            .
Korea, North                        Pyongyang            23295340     47400 Asia                            1991
Korea, South                        Seoul                45529277     38300 Asia                            1991
Kuwait                              Kuwait City           1837006      6900 Asia                            1963
Kyrgyzstan                          Bishkek               4744505     76600 Asia                            1992
Laos                                Vientiane             4748545     91400 Asia                            1955
Latvia                              Riga                  2776212     24900 Europe                          1991
Lebanon                             Beirut                3655834      3900 Asia                            1945
Leeward Islands                     Plymouth                12119       100 Central America and Caribbean      .
Lesotho                             Maseru                1963244     11700 Africa                          1966
Liberia                             Monrovia              3002430     38200 Africa                          1945
Libya                               Tripoli               5107059    679400 Africa                          1955
Liechtenstein                       Vaduz                   30297       100 Europe                          1990
Lithuania                           Vilnius               3886091     25200 Europe                          1991
Luxembourg                          Luxembourg             405980       100 Europe                          1945
Macedonia                           Skopje                2235917      9900 Europe                          1993
Madagascar                          Antananarivo         13560924    226700 Africa                          1960
Malawi                              Lilongwe              9828337     45700 Africa                          1964
Malaysia                            Kuala Lumpur         19473883    127600 Asia                            1957
Maldives                            Male                   254495       100 Asia                            1965
Mali                                Bamako                9203210    482100 Africa                          1960
Malta                               Valletta               370633       100 Europe                          1964
Marshall Islands                    Majuro                  54535       100 Oceania                         1991
Mauritania                          Nouakchott            2214709    398000 Africa                          1961
Mauritius                           Port Louis            1128057      1000 Africa                          1968
Mexico                              Mexico City          93114708    756100 North America                   1945
Micronesia                          Palikir                121188       300 Oceania                         1991
Moldova                             Chisinau              4517279     13000 Europe                          1992
Monaco                              Monaco                  31307       100 Europe                          1993
Mongolia                            Ulaan Baatar          2454055    604800 Asia                            1961
Montenegro                          Titograd               626137      5300 Europe                             .
Morocco                             Rabat                28841705    177100 Africa                          1956
Mozambique                          Maputo               17517708    313700 Africa                          1975
Myanmar                             Yangon               44715298    261200 Asia                            1948
Namibia                             Windhoek              1611798    318100 Africa                          1990
Nauru                               Yaren                   10099       100 Oceania                            .
Nepal                               Kathmandu            21250295     56800 Asia                            1955
Netherlands                         Amsterdam            15538306     16000 Europe                          1945
Netherlands Antilles                Willemstad             185822       400 Central America and Caribbean      .
New Zealand                         Wellington            3422548    104500 Oceania                         1945
Nicaragua                           Managua               4137556     50900 Central America and Caribbean   1945
Niger                               Niamey                8720477    497000 Africa                          1960
Nigeria                             Abuja                99062003    356700 Africa                          1960
Northern Ireland                    Belfast               1585541      5500 Europe                             .
Norway                              Oslo                  4357714    125100 Europe                          1945
Oman                                Muscat                1717838    118200 Asia                            1971
Pakistan                            Islamabad            1.2306E8    339700 Asia                            1947
Panama                              Panama City           2656034     29200 Central America and Caribbean   1945
Papua New Guinea                    Port Moresby          4238546    178700 Asia                            1975
Paraguay                            Asuncion              5265614    157000 South America                   1945
Peru                                Lima                 23885121    496200 South America                   1945
Philippines                         Manila               70500039    115900 Asia                            1945
Poland                              Warsaw               39037645    120700 Europe                          1945
Portugal                            Lisbon               10628177     35700 Europe                          1955
Puerto Rico                         San Juan              3556864      3492 Central America and Caribbean      .
Qatar                               Doha                   518078      4400 Asia                            1971
Romania                             Bucharest            23410469     91700 Europe                          1955
Russia                              Moscow               1.5109E8   6592800 Europe                          1945
Rwanda                              Kigali                8456895     10200 Africa                          1962
Saint Kitts and Nevis               Basseterre              41406       100 Central America and Caribbean   1983
Saint Lucia                         Castries               146436       200 Central America and Caribbean   1979
Saint Vincent and the Grenadines    Kingstown              116138       200 Central America and Caribbean   1980
San Marino                          San Marino              24238       100 Europe                          1992
Sao Tome and Principe               Sao Tome               138356       400 Africa                          1975
Saudi Arabia                        Riyadh               18377132    865000 Asia                            1945
Scotland                            Edinburgh             5006069     30400 Europe                             .
Senegal                             Dakar                 8817428     76000 Africa                          1960
Serbia                              Belgrade              9755624     34100 Europe                             .
Seychelles                          Victoria                72713       200 Africa                          1976
Sierra Leone                        Freetown              4675832     27200 Africa                          1961
Singapore                           Singapore             2887301       200 Asia                            1965
Slovakia                            Bratislava            5457495     18900 Europe                          1993
Slovenia                            Ljubljana             1991521      7800 Europe                          1992
Solomon Islands                     Honiara                389821     11000 Oceania                         1978
Somalia                             Mogadishu             6732996    246300 Africa                          1960
South Africa                        Cape Town            44365873    473300 Africa                          1945
Spain                               Madrid               39692061    194900 Europe                          1955
Sri Lanka                           Colombo              18211509     25300 Asia                            1955
Sudan                               Khartoum             29711229    966800 Africa                          1956
Suriname                            Paramaribo             427188     63300 South America                   1975
Swaziland                           Mbabane                945265      6700 Africa                          1968
Sweden                              Stockholm             8864893    173700 Europe                          1946
Switzerland                         Bern                  7109689     15900 Europe                             .
Syria                               Damascus             15034366     71500 Asia                            1945
Taiwan                              Taipei               21509839     14000 Asia                               .
Tajikistan                          Dushanbe              6054344     55300 Asia                            1992
Tanzania                            Dar-es-Salaam        28263033     36400 Africa                          1961
Thailand                            Bangkok              60099089    198100 Asia                            1946
Togo                                Lome                  4297120     21900 Africa                          1960
Tonga                               Nuku'alofa             106040       300 Oceania                            .
Trinidad and Tobago                 Port of Spain         1341146      2000 Central America and Caribbean   1962
Tunisia                             Tunis                 8813388     63400 Africa                          1956
Turkey                              Ankara               62769263    300948 Europe                          1945
Turkmenistan                        Ashgabat              4034546    188400 Asia                            1992
Turks and Caicos Islands            Grand Turk              12119       200 Central America and Caribbean      .
Tuvalu                              Funafuti                10099       100 Oceania                            .
Uganda                              Kampala              20055584     93100 Africa                          1962
Ukraine                             Kiev                 52360233    233100 Europe                          1945
United Arab Emirates                Abu Dhabi             2818628     30000 Asia                            1971
United States                       Washington           2.6329E8   3787318 North America                   1945
Uruguay                             Montevideo            3230667     68000 South America                   1945
Uzbekistan                          Tashkent             22832806    172700 Asia                            1992
Vanuatu                             Vila                   171683      4700 Oceania                         1981
Vatican City                        Vatican City             1010         2 Europe                             .
Venezuela                           Caracas              20765543    352100 South America                   1945
Vietnam                             Hanoi                73827657    127200 Asia                            1977
Wales                               Cardiff               2825697      8000 Europe                             .
Western Samoa                       Apia                   206020      1100 Oceania                         1976
Yemen                               Sanaa                11214929    205300 Asia                            1947
Yugoslavia                          Belgrade             10866513     39400 Europe                          1945
Zambia                              Lusaka                9278952    290600 Africa                          1964
Zimbabwe                            Harare               11083641    150900 Africa                          1980
;
run;
*';

/* LEFT OUTER JOIN例子 */
proc sql;
   title 'Coordinates of Capital Cities';
   select Capital format=$20., 
          Name 'Country' format=$20., 
          Latitude, Longitude
      from countries a left join 
           worldcitycoords b
           on a.Capital = b.City and
              a.Name = b.Country
      order by Capital;
quit;

/* RIGHT OUTER JOIN例子 */
proc sql;
   title 'Populations of Capitals Only';
   select City format=$20., 
          Country 'Country' format=$20., 
          Population
      from countries right join 
           worldcitycoords 
           on Capital = City and
              Name = Country
      order by City;
quit;

/* FULL OUTER JOIN例子 */
proc sql;   
   title 'Populations/Coordinates of World Cities';
   select City '#City#(WORLDCITYCOORDS)' format=$20.,
          Capital '#Capital#(COUNTRIES)' format=$20.,
          Population, Latitude, Longitude
      from countries full join 
           worldcitycoords
           on Capital = City and
              Name = Country; 
quit;
title;








**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;
**************************************************;

/*************************************************
**                                              **
**                  第三章                       **
**                                              **
*************************************************/
    
proc print data=c9501;
  by sex;
run;



proc means data=samp.c9501;
  var math;
  output out=result n=n 
         mean=meanmath var=varmath;
run;
proc print data=result; run;


/******************************************/
/*            PROC PRINT例子              */
/******************************************/
proc print data=samp.c9501 label;
  id name;
  var math chinese;
  label name='姓名' math='数学 成绩' 
        chinese='语文 成绩';
run;

proc print data=samp.c9501 split="*";
  id name;
  var math chinese;
  label name='姓名' math='数学*成绩' 
        chinese='语文*成绩';
run;



proc print data=samp.c9501; 
  format math 5.1 chinese 5.1;
run;


proc print data=samp.gpa; run;

    
proc print data=samp.c9501;
  var name chinese sex;
run;

proc print data=samp.c9501 noobs;
run;

proc print data=samp.c9501;
  where name in ('李明', '张聪');
run;


proc sort data=samp.c9501 out=c9501;
   by sex;
run;
proc print data=c9501;
    by sex;
run;


proc means data=c9501;
  var math chinese;
run;



proc print data=c9501 noobs label;
  var name sex math chinese avg;
  label name='姓名' sex='性别' math='数学' 
        chinese='语文' avg='平均分';
run;

/* ODS HTML, 生成一系列文件
*/
ods html body="testhtml.htm"; 
title '身高和体重数据';
proc print data=samp.class noobs;
run;
title '身高和体重基本统计';
proc means data=samp.class;
  var height weight;
  class sex;
run;
title;
ods html close;
ods html;


/* ODS CSVALL, 生成一系列文件
*/
ods csvall body="testcsv.csv"; 
title '身高和体重数据';
proc print data=samp.class noobs;
run;
title '身高和体重基本统计';
proc means data=samp.class;
  var height weight;
  class sex;
run;
title;
ods csvall close;

/* ODS RTF
*/
ods rtf file="testods.rtf"; 
title '身高和体重数据';
proc print data=samp.class noobs;
run;
title '身高和体重基本统计';
proc means data=samp.class;
  var height weight;
  class sex;
run;
ods rtf close;


ods rtf file="testods.rtf" contents toc_data
  bodytitle startpage=no keepn;
ods rtf style=Minimal;
ods noproctitle;
title '身高和体重数据';
proc print data=samp.class noobs;
run;
title '身高和体重基本统计';
proc means data=samp.class;
  var height weight;
  class sex;
run;
ods rtf close;

/* ODS LATEX
*/
ods latex file='sasout.tex'
  stylesheet='sas.sty'(url='sas');
proc means data=samp.class;
  title 'Mean Height and Weight';
  var height weight;
run;
ods latex close;

ods pdf file='sasout.pdf';
title;
proc print data=samp.class noobs label;
run;
ods pdf close;

/* 用ODS保存过程输出表为数据集 */
ods trace on;
proc univariate data=samp.class;
  var height;
run;
ods trace off;
ods output Moments=mom Quantiles=qu;
proc univariate data=samp.class;
  var height;
run;
ods output close;


proc print data=c9501 noobs;
  var name sex math chinese avg;
  format math 7.1 chinese 7.1 avg 7.2;
run;

title '95级1班成绩表';
proc print data=c9501 noobs label;
  var name sex math chinese avg;
  label name='姓名' sex='性别' math='数学'
        chinese='语文' avg='平均分';
run;

proc means data=samp.gpa;
run;

title;

footnote '第三章例子输出';

options nonumber nodate 
        linesize=64 pagesize=60;

proc sort data=c9501;
  by sex;
run;
proc print data=c9501;
  by sex;
run;


data bkmoney;
  input name $  amount;
  cards;
李明  20
张红艺 15
王思明 10
张聪 20
刘颍 50
;
run;


proc print data=bkmoney noobs;
  sum amount;
run;


proc sort data=c9501;
  by name;
proc sort data=bkmoney;
  by name;
data c9501bk;
  merge c9501 bkmoney;
  by name;
run;
proc sort data=c9501bk;
  by sex;
proc print data=c9501bk;
  by sex;
  sum amount;
run;

/* 用BY和ID指定相同变量可以简洁显示 */
proc print data=c9501bk;
  by sex;
  id sex;
run;

/* 用PROC REPORT分组简洁显示 */
data new;
  set samp.class;
  select(sex);
    when('F') sexc='女';
    when('M') sexc='男';
  end;
run;
proc report data=new nowd;
  column sexc age name height weight;
  define sexc / order '性别';
  define age / order '年龄';
run;

proc format;
  value $sfmt
    'F'='女'
    'M'='男';
run;
proc report data=class nowd;
  column sex age name height weight;
  define sex / group '性别';
  define age / group '年龄';
  format sex $sfmt.;
run;

/* PROC PRINT 的在线例子 */
proc print data=exprev double;
   var month state revenues;
   where region='Southern' and revenues>=8000;
   title1 'High Monthly Revenues for';
   title2 'the Southern Region';
run;

/* ODF LaTeX 例子 */
ods latex file="simple.tex" stylesheet="sas.sty"(url="sas");
proc print data=samp.class;
run;
ods latex close;


options nodate pageno=1 linesize=64 pagesize=60;

proc sort data=pilots out=tempemp;
   by jobcode gender;
run;	   

proc print data=tempemp split='*';
   id jobcode;
   by jobcode;
   var gender salary;
   sum salary;
   label jobcode='Job Code*========'
         gender='Gender*======'
         salary='Annual Salary*=============';
   format salary dollar11.2;
   where jobcode in ('PT1','PT2');
   title 'Expenses Incurred for';
   title2 'Salaries for Pilots';	
run;
title;




/******************************************/
/*        PROC TABULATE例子               */
/******************************************/
/* PROC TABULATE 基本例子 */
proc tabulate data=samp.c9501bk;
  class sex;
  var amount;
  table sex, amount;
run;

proc tabulate data=samp.c9501bk;
  class sex;
  table sex;
run;


proc tabulate data=samp.c9501bk;
  class sex;
  var math chinese;
  table sex, (math chinese)*(mean std);
run;

proc tabulate data=samp.c9501bk;
  class sex;
  var math chinese;
  table sex, (mean std)*(math chinese);
run;


proc tabulate data=samp.c9501bk;
  class sex;
  var math chinese;
  table sex all, (math chinese)*(mean std);
run;

proc tabulate data=samp.drug;
  class drug disease;
  var chang_bp;
  table drug*disease, chang_bp*(mean std);
run;
proc tabulate data=samp.drug;
  class drug disease;
  var chang_bp;
  table drug, disease*chang_bp*(mean std);
run;

proc tabulate data=samp.c9501bk;
  class sex;
  table (sex all)*(N PCTN);
run;

proc tabulate data=samp.c9501bk;
  class sex;
  var math chinese;
  table (sex all)*math*(mean std);
run;

proc tabulate data=samp.c9501bk;
  class sex;
  var math chinese;
  table (sex='性别' 'all'='总计'), 
        (math='数学' chinese='语文')*('mean'='平均值'
         'std'='标准差');
run;

proc tabulate data=samp.c9501bk;
  class sex;
  var math chinese;
  table (sex all), 
        (math chinese)*(mean std);
  keylabel mean='平均值' 
           std='标准差' all='总计';
  label sex='性别' math='数学' 
        chinese='语文';
run;

proc tabulate data=samp.c9501bk;
  class sex;
  var math chinese;
  table sex='性别' 'all'='总计', 
        (math='数学' chinese='语文')*
        ('mean'='平均值'*F=6.1
         'std'='标准差'*F=7.2);
run;

proc tabulate data=samp.c9501bk OUT=summd;
  class sex;
  var amount;
  table sex, amount*(N SUM PCTSUM);
run;


/* 与ODS配合使用STYLE选项说明。 */
proc tabulate data=samp.c9501bk;
  class sex;
  var math chinese / style=[font_size=200%];
  table sex=' ' 
        'all'='总计'*[style=[font_weight=bold background=white]]
        , 
        (math='数学' chinese='语文')*
        ('mean'='平均值'*F=6.1
         'std'='标准差'*F=7.2);
run;



/*  PROC TABULATE 的在线例子 */
proc print data=wghtclub; 
   title 'Health Club Data'; 
run;
proc tabulate data=wghtclub; 
   class team; 
   var strtwght endwght loss; 
   table team, mean*(strtwght endwght loss);
   table mean*(strtwght endwght loss), team; 
   title 'Mean Starting Weight, Ending Weight,
         and Weight Loss'; 
run;
quit;


/* specify HTML output               */
ods html file='table.htm' style=default;

/* 带有Style指定的TABULATE */
proc tabulate data=patients 
    style=[font_weight=bold];
    class actlevel;
    classlev actlevel / style=[just=left];
    var age height weight / style=[font_size=3];
    keyword all sum / style=[font_width=wide];
    keylabel all="All Patients";
    table (actlevel="Activity Level" 
         all*[style=[background=yellow]]),
        (age height weight*f=best10.2)*mean /
        style=[background=white]
        misstext=[label="Missing"  style=[font_weight=light]]
        box=[label="Patient Info by Activity Level"
        style=[font_style=italic]];
    title 'Enhanced Table';
    length Group $ 8;
run;
ods html close;
title;



/******************************************/
/*            PROC SORT例子                */
/******************************************/
/* 排序例子 */
proc sort data=samp.c9501;
  by sex;
run;

/* 剔除重复值的排序 */
proc sort data=samp.c9501 
    out=c9501sex(keep=sex) nodupkey;
  by sex;
run;
proc print;run;

proc sort data=samp.c9501;
  by sex descending avg;
run;

/* 排序与数据步 */
proc sort data=samp.class out=cl2;
  by sex descending height;
run;
data new;
  set cl2;
  if first.sex;
  by sex;
run;
proc print;run;
/* 同样的问题用SQL解决。应改进为单个语句。*/
proc sql;
  select sex, max(height) AS height
      from samp.class
      group by sex;
run;
  create table tmpd AS
    select sex, max(height) AS height
      from samp.class
      group by sex;
run;
  select a.name, a.sex, a.height
    from samp.class AS a, tmpd AS b
    where a.sex=b.sex 
       and a.height=b.height
  ;
run;
  drop table tmpd;
run;
quit;
* 上述程序简化为子查询;
proc sql;
  select a.name, a.sex, a.height
    from samp.class AS a, (
	  select sex, max(height) AS height
        from samp.class
        group by sex
	  ) AS b
    where a.sex=b.sex 
      and a.height=b.height
  ;
quit;


/******************************************/
/*           PROC TRANSPOSE例子           */
/******************************************/

/* 简单矩阵转置 */
data mat;
  input x1 x2 x3;
  cards;
1   2  3
4   5  6
7   8  9
10 11 12
;
run;
proc transpose data=mat
    out=matt(rename=(col1=I1 col2=I2 col3=I3 col4=I4));
  var x1 x2 x3;
run;
proc print;run;



/* 合并行, 长表变宽表。 用TRANSPOSE. */
data onecol;
  input num test $ val;
  cards;
1 a 11
2 b 22
3 a 13
1 b 21
2 a 12
3 b 23
;
run;
title '合并行: PROC TRANSPOSE';
proc sort data=onecol;
  by num;
run;
proc print;run;

/* 错误结果！ */
proc transpose data=onecol out=twotest;
  var val;
  by num;
run;
proc print;run;

/* 结果正确但不保险 */
proc sort data=onecol;
  by num test;
run;
proc transpose data=onecol out=twotest;
  var val;
  by num;
run;
proc print;run;
data onecolb;
  input num test $ val;
  cards;
1 a 11
2 b 22
3 a 13
1 b 21
3 b 23
;
run;
proc sort data=onecolb;
  by num test;
run;
proc transpose data=onecolb out=twotest;
  var val;
  by num;
run;
proc print;run;

/* 正确解法 */
proc transpose data=onecol out=twotest;
  var val;
  id test;
  by num;
run;
proc print;run;
proc transpose data=onecolb out=twotestb;
  var val;
  id test;
  by num;
run;
proc print;run;


/* 用数据步进行长表变宽表。
*/
title '合并行: 数据集拆分与横向合并';
data a;
  set onecol;
  where test='a';
  a=val;
  keep num a;
run;
data b;
  set onecol;
  where test='b';
  b=val;
  keep num b;
run;

/* 或 */
data a(where=(test='a') rename=(val=a))
     b(where=(test='b') rename=(val=b));
  set onecol;
run;
proc print data=a;run;
proc print data=b;run;


proc sort data=a; by num;
proc sort data=b; by num;
data new;
  merge a b;
  by num;
run;
proc print;run;

/* 用PROC SQL做长表变宽表 */
title '合并行: PROC SQL';
proc sql;
  select a.num as num, a.val as a, b.val as b
    from samp.onecol a, samp.onecol b
	where a.num=b.num and a.test='a' and b.test='b';
quit;


/* 拆分一行为多行，宽表变长表。用TRANSPOSE。 */
title '拆分行: PROC TRANSPOSE';
proc sort data=samp.twocol;
  by num;
run;
proc transpose data=twocol out=onetest;
  var test1 test2;
  by num;
run;
proc print;run;
**************************************************;
title '拆分行: PROC TRANSPOSE';
proc sort data=samp.twocol out=twocol;
  by num;
run;
/* 为了给转置的变量起名字，使用数据集选项: */
proc transpose data=twocol out=onetest(rename=(col1=val));
  var test1 test2;
  by num;
run;
proc print;run;

/* 为了把试验号test1, test2改为1，2，使用字符串函数,
**   用LENGTH语句为输出的数据集变量排左右次序。
 */
data new(drop=_name_);
  length num 8 testid 8 val 8;
  set onetest;
  testid = input(substr(_name_, 5,1), 1.);
run;
proc print;run;

/* 用数据步拆分行。宽表变长表。
*/
title '拆分行: 数据步重复OUTPUT';
data new1;
  attrib num length=8
    test length=$1
	val length=8;
  set samp.twocol;
  val=test1; test='a'; output;
  val=test2; test='b'; output;
  drop test1 test2;
run;
proc print;run;

/* 用PROC SQL拆分行 */
title '拆分行: PROC SQL';
proc sql;
  select num, 'a' As test, test1 AS val
    from samp.twocol a
  union
  select num, 'b' As test, test2 AS val
    from samp.twocol;
quit;

/* 用array和循环拆分行。 input语句中的斜杠表示读下一行。 */
data rats;
  infile cards firstobs=2;
  attrib rem1-rem20 format= $1.;
  input w1 $ 1-12 dose1-dose20 / w2 $ 1-12  rem1-rem20 $;
  array x(20) dose1-dose20;
  array y(20) $ rem1-rem20;
  do i = 1 to 20;
    dose = x(i);
    remiss = y(i);
    output;
  end;
  keep i dose remiss;
  cards;
rat number   1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 
trtmt dose  25 30 35 40 45 50 55 60 65 70 25 30 35 40 45 50 55 60 65 70
remission    F  F  F  F  T  T  F  T  T  T  F  F  F  F  F  T  T  T  T  T
;
proc print data = rats;
run;



/******************************************/
/*           PROC MEANS例子               */
/******************************************/

/* PROC Means */
proc means data=c9501;
  var math chinese;
run;
proc means data=samp.class sum cv;
  var height;
run;


/* Means例子：计算置信区间 */
proc means data=samp.class alpha=0.10 mean std clm;
  var height;
run;

proc means data=samp.class;
  var height weight;
  output out=mc1 mean=mh mw;
run;

proc means data=samp.class;
  var height weight;
  output out=mc2 mean= std= / autoname;
run;

proc means data=samp.class 
    NOPRINT MEAN MIN MAX;
  var height weight;
  output out=mc3; 
run;
proc print;run;

/* Means例子：身高减去全班平均身高。
   一个数据步中用了两个SET语句。
*/
proc means data=samp.class;
  var height;
  output out=mc1 mean=mh;
run;
data cc;
  set samp.class;
  if _n_=1 then set mc1(keep=mh);
  height2 = height - mh;
run;
proc print;run;

proc datasets nolist;
    delete cc;
quit;

/* Means例子: 用CLASS语句分组 */
proc means data=samp.class;
  class sex;
  var height;
  output out=mcs mean=mh;
run;
proc print;run;
/* Means例子：分男女生，身高减去本组的平均身高得到height2。
*/
proc means data=samp.class nway;
  class sex;
  var height;
  output out=mcs mean=mh;
run;
proc sort data=samp.class; by sex;
proc sort data=mcs; by sex;
data new;
  merge samp.class mcs(drop=_type_ _freq_);
  by sex;
  height2 = height - mh;
run;
proc print;run;

/* Means例子: 用CLASS语句分组 */
proc means data=samp.grade maxdec=3;
     var Score;
     class Status Year;
     title '按学生状态和毕业年分类的分数统计';
run;
title;

/* Means例子: 用CLASS语句分组汇总 */
proc means data=samp.grade noprint;
  var Score;
  class Status Year;
  output out=mgsy mean=ms std=ss;
run;
proc print;run;

/* Means例子: 指定要输出的统计量名 */    
proc means data=samp.cake n mean max min range std fw=8;
     var PresentScore TasteScore;
     title 'Summary of Presentation and Taste Scores';
run;



/******************************************/
/*       PROC UNIVARIATE例子              */
/******************************************/

/* PROC UNIVARIATE */
proc univariate data=samp.gpa;
  var gpa;
run;

proc univariate data=samp.gpa plot;
  var gpa;
run;


/******************************************/
/*          PROC FREQ例子                 */
/******************************************/

/* PROC FREQ */
proc freq data=c9501;
  tables sex;
run;

/* PROC FREQ: 输出为数据集 */
proc freq data=c9501;
  tables sex / out=sext;
run;
proc print;run;

data samp.taxif;
  input amount num @@;
  cards;
10 4 12 6 13 1 15 1
16 1 19 5 20 3 23 1
24 1 25 1 26 3 27 1
32 1 47 1 48 2 49 1
52 1 55 1 58 1 81 1
;
run;
proc means data=taxif n mean sum;
  var amount;
  freq num;
run;


/* RPOC FREQ: 眼睛和头发颜色，简单的频数表
*/
proc freq data=samp.color;
   tables eyes hair;
   weight count;
   title1 'Simple Frequency Tables';
run;

/* RPOC FREQ: 眼睛和头发颜色列联表，保存为数据集
*/
proc freq data=samp.color;
   weight count;
   tables eyes*hair/out=freqcnt outexpect
          sparse;
   title1 'Crosstabulation Table';
run;
proc print data=freqcnt noobs;
   title2 'Output Data Set from PROC FREQ';
run;
title;
    
/* PROC FREQ列联表统计结果数据集生成例子 */
data test;
  do startid=1 to 3;
    do sastime=1 to 4;
	  bid=rantbl(1133, 0.5, 0.3, 0.2);
	  do pid=1 to bid;
	    output;
	  end;
	end;
  end;
  keep startid sastime pid;
run;
proc print;run;
proc freq data=test;
  tables startid*sastime / out=counts;
run;
proc print;run;



/******************************************/
/*          PROC CORR例子                 */
/******************************************/
/* PROC CORR */
proc corr data=samp.gpa;
  var hsm hss hse;
run;

proc corr data=samp.gpa;
  var hsm hss hse;
  with satm satv;
run;

proc corr data=samp.gpa spearman;
  var satm satv;
  with gpa;
run;



/******************************************/
/*          PROC GPLOT例子                 */
/******************************************/

/* 绘图：散点图 */
proc gplot data=samp.gpa;
  symbol i=none v=star;
  plot satv*satm;
run;


/* 绘图：连线图 */
proc gplot data=samp.air;
  symbol i=join v=star;
  plot co*datetime;
run;

/* 绘图：散点和连线图，规定颜色
   goptions语句是全局语句，规定图形选项；
   reset=global会重置symbol, pattern, axis, legend
   等图形全局语句的设置；
   reset=all除了重置这些语句设置以外还重置goptions语句
   规定的设置。
   plot语句选项：
   haxis=指定横坐标轴刻度，vaxis=指定纵坐标轴刻度，
   hminor=指定横轴的细分刻度数（等于细分份数减一），
   vminor=指定纵轴的细分刻度数，
   vref=指定纵轴的参考线位置，
   lvref=指定纵轴参考线线型，
   cvref=指定纵轴参考线颜色，
   caxis=指定坐标轴颜色，
   ctext=指定文本颜色。
   symbol语句中height指定散点符号大小。
 */
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red) 
         ftext=swiss ftitle=swissb htitle=6 htext=4; 
proc gplot data=samp.stocks;
   plot dowjoneshigh*year / haxis=1955 to 1995 by 5
                            vaxis=0 to 6000 by 1000
                            hminor=4
                            vminor=1
                            vref=1000 3000 5000
                            lvref=2
                            cvref=blue
                            caxis=blue
                            ctext=red;
   symbol1 color=red
           interpol=join
           value=dot
           height=1;
   title1 'Dow Jones Yearly Highs';
   footnote h=3 j=l ' Source: 1997 World Almanac' 
           j=r 'GPLDTPT1 '; 
run;
quit;
title;footnote;

/* 绘图：多条曲线 */
proc gplot data=samp.air;
  symbol1 color=black i=join v=none line=1 ;
  symbol2 color=blue i=join v=none line=2 ;
  plot co*datetime=1 so2*datetime=2 / overlay;
run;

/* GPLOT例子: 股票数据多曲线图 */
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red) 
         ftext=swiss ftitle=swissb htitle=6 htext=4; 

title1 'Dow Jones Yearly Highs and Lows'; 
footnote1 h=3 j=l ' Source: 1997 World Almanac' 
          j=r 'GPLOVRL1 '; 
 
symbol1 color=red interpol=join value=dot height=3; 
symbol2 font=marker value=C color=blue interpol=join height=2; 
 
axis1 order=(1955 to 1995 by 5) offset=(2,2) 
      label=none 
      major=(height=2) 
      minor=(height=1) 
      width=3; 
 
axis2 order=(0 to 6000 by 1000) offset=(0,0) 
      label=none 
      major=(height=2) 
      minor=(height=1) 
      width=3; 
 
legend1 label=none shape=symbol(4,2) 
        position=(top center inside) 
        mode=share; 
 
proc gplot data=samp.stocks; 
   plot DowJonesHigh*year DowJonesLow*year / overlay legend=legend1 
                             vref=1000 to 5000 by 1000 lvref=2 
                             haxis=axis1 hminor=4 
                             vaxis=axis2 vminor=1; 
run; 
quit; 
title;footnote;

/* GPLOT例子: 月平均气温的多曲线 */
goptions reset=global gunit=pct border cback=white 
         colors=(black red blue green) 
         ftext=swiss ftitle=swissb htitle=6 htext=3; 
 
title1 'Average Monthly Temperature'; 
footnote1 j=l ' Source: 1984 American Express'; 
footnote2 j=l '         Appointment Book' 
          j=r 'GPLVRBL2(a) '; 
 
symbol1 interpol=join 
        value=dot 
        height=3; 
 
proc gplot data=samp.citytemp; 
  plot faren*month=city / hminor=0; 
run; 
 
  footnote2 j=l '         Appointment Book' 
          j=r 'GPLVRBL2(b) '; 
 
  symbol1 color=green interpol=spline width=2 value=triangle 
        height=3; 
  symbol2 color=blue interpol=spline width=2 value=circle 
        height=3; 
  symbol3 color=red interpol=spline width=2 value=square 
        height=3; 
 
  axis1 label=none 
      value=('JAN' 'FEB' 'MAR' 'APR' 'MAY' 'JUN' 
            'JUL' 'AUG' 'SEP' 'OCT' 'NOV' 'DEC') 
      order = (1 to 12 by 1)
      offset=(2) 
      width=3; 
  axis2 label=('Degrees' justify=right 'Fahrenheit') 
      order=(0 to 100 by 10) 
      width=3; 
 
  legend1 label=none value=(tick=1 'Minneapolis'); 
 
  plot faren*month=city / 
       haxis=axis1 hminor=0 
       vaxis=axis2 vminor=1 
       caxis=red legend=legend1; 
run; 
quit;
title;footnote;

/* GPLOT例子: 两个纵轴, 垂线图 */
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red) 
         ftext=swiss ftitle=swissb htitle=6 htext=3; 
 
data samp.minntemp; 
   input @10 month 
         @23 f2;    /* fahrenheit temperature for Minneapolis */ 
      /* calculate centigrade temperature */ 
      /* for Minneapolis                  */ 
   c2=(f2-32)/1.8; 
   output; 
   datalines; 
01JAN83  1    1   40.5  12.2  52.1 
01FEB83  2    1   42.2  16.5  55.1 
01MAR83  3    2   49.2  28.3  59.7 
01APR83  4    2   59.5  45.1  67.7 
01MAY83  5    2   67.4  57.1  76.3 
01JUN83  6    3   74.4  66.9  84.6 
01JUL83  7    3   77.5  71.9  91.2 
01AUG83  8    3   76.5  70.2  89.1 
01SEP83  9    4   70.6  60.0  83.8 
01OCT83  10   4   60.2  50.0  72.2 
01NOV83  11   4   50.0  32.4  59.8 
01DEC83  12   1   41.2  18.6  52.5 
; 
run; 

title1 'Average Monthly Temp for Minneapolis'; 
footnote1 j=l ' Source: 1984 American Express'; 
footnote2 j=l '         Appointment Book' 
          j=r 'GPLSCVL1 '; 
 
symbol1 interpol=needle ci=blue cv=red width=3 value=star 
        height=3; 
symbol2 interpol=none value=none; 
 
axis1 label=none 
      value=(h=3 'JAN' 'FEB' 'MAR' 'APR' 'MAY' 'JUN' 
                 'JUL' 'AUG' 'SEP' 'OCT' 'NOV' 'DEC') 
      order=(1 to 12 by 1) 
      offset=(2) 
      width=3; 
axis2 label=(h=3 'Degrees' justify=right ' Centigrade') 
      order=(-20 to 30 by 10) 
      width=3 
      value=(h=3); 
axis3 label=(h=3 'Degrees' justify=left 'Fahrenheit') 
      order=(-4 to 86 by 18) 
      width=3 
      value=(h=3); 
 
proc gplot data=samp.minntemp; 
   plot c2*month / frame 
                   caxis=red 
                   haxis=axis1 hminor=0 
                   vaxis=axis2 vminor=1; 
   plot2 f2*month / caxis=red 
                    vaxis=axis3 vminor=1; 
run; 
quit; 
title;footnote;

/* GPLOT例子: 股票数据，线间填充AREA选项 */
goptions reset=global gunit=pct border cback=white 
         colors=(blue red) ctext=black 
         ftext=swiss ftitle=swissb htitle=6 htext=4; 
 
title1 'Dow Jones Yearly Highs and Lows'; 
footnote1 h=3 j=l ' Source: 1997 World Almanac' 
         j=r 'GPLFILL1 '; 
 
symbol1 interpol=join; 
 
axis1 order=(1955 to 1995 by 5) offset=(2,2) 
      label=none 
      major=(height=2) 
      minor=(height=1); 
axis2 order=(0 to 6000 by 1000) offset=(0,0) 
      label=none 
      major=(height=2) 
      minor=(height=1); 
 
proc gplot data=samp.stocks; 
   plot DowJonesLow*year DowJonesHigh*year / overlay 
                             haxis=axis1 hminor=4 
                             vaxis=axis2 vminor=1 
                             caxis=black 
                             areas=2; 
run; 
quit; 



/* 绘图：带回归线的散点图 */
proc gplot data=samp.gpa;
  symbol i=rlcli95 v=star;
  plot satv*satm;
run;

/* 绘图：对模拟数据使用回归和平滑 */
data curves;
  n = 30;
  do i=1 to n;
    x = normal(0)*10;
    y = 10 + 2*x + normal(0);
    output;
  end;
  keep x y;
run;
proc sort data=curves;
  by x;
proc gplot data=curves;
  symbol v=star i=spline;
  plot y*x;
  title 'Spline Smooth';
run;
proc gplot data=curves;
  symbol v=star i=sm70;
  plot y*x;
  title 'i=sm70 Smooth';
run;
proc gplot data=curves;
  symbol v=star i=rlcli95;
  plot y*x;
  title 'i=rlcli95 Smooth';
run;
title;

/* 绘图：对稀疏的正弦和余弦曲线采样用spline平滑 */
data sine;
  do x=0 to 360 by 70;
     y = sin(x/180*3.1415926);
	 y2=.;
	 output;
  end;
  do x=0 to 360;
    y=.;
    y2 = sin(x/180*3.1415926);
	output;
  end;
run;
proc gplot data=sine;
  symbol i=spline v=star color=black;
  symbol2 i=join v=none color=blue;
  plot y*x=1 y2*x=2 / overlay;
run;


/* GPLOT 例子: 散点图和回归线 */
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red) 
         ftext=swiss ftitle=swissb htitle=6 htext=4; 
 
title 'Study of Height vs Weight'; 
footnote1 h=3 j=l ' Source: T. Lewis & L. R. Taylor'; 
footnote2 h=3 j=l '         Introduction to Experimental Ecology' 
          j=r 'GPLVRBL1(a) '; 
 
proc gplot data= samp.stats; 
  plot height*weight; 
run; 
 
  footnote1; 
  footnote2 h=3 j=r 'GPLVRBL1(b) '; 
 
  symbol1 interpol=rcclm95 /* regression analysis with */ 
                         /* confidence limits        */ 
        value=diamond    /* plot symbol              */ 
        height=3         /* plot symbol height       */ 
        cv=red           /* plot symbol color        */ 
        ci=blue          /* regression line color    */ 
        co=green         /* confidence limits color  */ 
        width=2;         /* line width               */ 
 
   plot height*weight / haxis=45 to 155 by 10 
                        vaxis=48 to 78 by 6 
                        hminor=1 
                        regeqn; 
run; 
quit; 
title;footnote;



/* PROC PLOT：低精度字符散点图  */
proc plot data=samp.stocks;
   plot dowjoneshigh*year='*';
   title 'High Dow Jones Values';
   title2 'from 1954 to 1998';
run;
quit;

/* PROC PLOT：低精度字符散点图，两条曲线  */
proc plot data=samp.stocks;
   plot dowjoneshigh*year='*'
        dowjoneslow*year='o' / overlay box;
   title 'Plot of Highs and Lows';
   title2 'for the Dow Jones Industrial Average';
run;
quit;



/* GPLOT例子: 气泡图。散点图的变种，气泡大小代表第三维。 
   数据为各类工程师平均工资。
   eng: 工程师种类。dollars: 工资。num:人数。
 */
data samp.jobs; 
   length eng $5; 
   input eng dollars num; 
   datalines; 
Civil 27308 73273 
Aero  29844 70192 
Elec  22920 89382 
Mech  32816 19601 
Chem  28116 25541 
Petro 18444 34833 
; 
run;
/* goptions中选项reset=global可以重置所有绘图选项，
   如颜色、绘图符号、连线类型、填充模式等;
   gunit指定绘图选项缺省的数值单位，
   border要求绘制坐标区域边界，
   cback为背景色，
   colors为各种符号、连线的颜色取用优先表，
   ftitle指定图形标题的字体，
   ftext指定图形标注的字体，
   htitle指定标题高度，这里单位是百分比所以标题高度为绘图高度的6%，
   htext指定图形标注字体高度。
 */
goptions reset=global gunit=pct border cback=white 
         colors=(black blue greeen red) 
         ftitle=swissb ftext=swiss htitle=6 htext=4; 
title1 'Member Profile'; 
title2 'Salaries and Number of Member Engineers'; 
footnote h=3 j=r 'GPLBUBL1 '; 
axis1 offset=(5,5);
/* 坐标轴原点位置，单位是百分比
   在下面的bubble语句中用了haxis=指定用axis1的规定画横轴。
   注意此图的横轴是一个分类变量。
 */
proc gplot data=samp.jobs; 
   format dollars dollar9.; 
   bubble dollars*eng=num / haxis=axis1; 
run; 
quit; 

/* GPLOT例子: 带标注的气泡图 */
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red) 
         ftitle=swissb ftext=swiss htitle=6 htext=3; 
title1 'Member Profile'; 
title2 h=4 'Salaries and Number of Member Engineers'; 
footnote h=3 j=r 'GPLBUBL2'; 
/* axis1和axis2:
   在下面的bubble语句中用了haxis=指定用axis1的规定画横轴,
   用vaxis=指定了用axis2的规定画纵轴。
   注意此图的横轴是一个分类变量。
   width=指定坐标轴粗细。
   label=none表示没有轴标签（变量名）。
   major=后面的括号中给出一些粗刻度线的规定，
   minor=后面的括号中给出一些细刻度线的规定，
   value=后面的括号中给出一些刻度值的规定。
 */
axis1 label=none 
      offset=(5,5) 
      width=3 
      value=(height=4); 
axis2 order=(0 to 40000 by 10000) 
      label=none 
      major=(height=1.5) 
      minor=(height=1) 
      width=3 
      value=(height=4);
/* bubble语句的bcolor指定气球颜色，
   blabel要求标出气球对应的数值，
   bfont指定显示气球对应数值的字体，
   bsize指定气球大小倍数，
   caxis指定坐标轴颜色。
 */
proc gplot data=samp.jobs; 
   format dollars dollar9. num comma7.0; 
   bubble dollars*eng=num / haxis=axis1 
                            vaxis=axis2 
                            vminor=1 
                            bcolor=red 
                            blabel 
                            bfont=swissi 
                            bsize=12 
                            caxis=blue; 
run; 
quit; 


/* GPLOT例子: 气泡图，带有第二个纵轴 */
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red) 
         ftext=swiss ftitle=swissb htitle=6 htext=3; 
data jobs2; 
   set samp.jobs; 
   yen=dollars*125; 
run; 
 
title1 'Member Profile'; 
title2 h=4 'Salaries and Number of Member Engineers'; 
footnote j=r 'GPLAXIS1 '; 
 
axis1 offset=(5,5) 
      label=none 
      width=3 
      value=(h=4); 
 
proc gplot data= jobs2; 
   format dollars dollar7. num yen comma9.0; 
   bubble dollars*eng=num / haxis=axis1 
                            vaxis=10000 to 40000 by 10000 
                            hminor=0 
                            vminor=1 
                            blabel 
                            bfont=swissi 
                            bcolor=red 
                            bsize=12 
                            caxis=blue; 
   bubble2 yen*eng=num / vaxis=1250000 to 5000000 by 1250000 
                         vminor=1 
                         bcolor=red 
                         bsize=12 
                         caxis=blue; 
run; 
quit; 
title;footnote;



/******************************************/
/*          PROC GCHART例子               */
/******************************************/
/* GCHART例子：直方图 */
proc gchart data=samp.gpa;
  vbar gpa;
run;

/* 用PROC UNIVARIATE作直方图 */
proc univariate data=samp.gpa   noprint;
   var GPA;
   histogram;
run;

/* GCHART例子：用GROUP=作分组直方图 */
proc gchart data=samp.gpa;
  vbar gpa / group=sex;
run;

/* GCHART例子: 用SUBGROUP=分段。不同年龄组人数，按性别分段。 */
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red) ftitle=swissb 
         ftext=swiss htitle=6 htext=3.5; 

title1 'Fitness Program Participants'; 
footnote1 h=3 j=r 'GCHBRMID(a) '; 
 
axis1 label=('Number of People') 
      minor=(number=1) 
      offset=(0,0); 
 
legend1 label=none 
        value=('Women' 'Men'); 
 
pattern1 color=cyan; 
pattern2 color=blue; 
 
proc gchart data=samp.fitness2; 
   hbar age / subgroup=sex 
              legend=legend1 
              autoref 
              clipref 
              coutline=black 
              raxis=axis1; 
run; 
 
   footnote h=3 j=r 'GCHBRMID(b) '; 

   axis1 order=(0 to 20 by 2) 
         label=('Number of People') 
         minor=(number=1) 
         offset=(0,0); 
 
   axis2 label=('Age' j=r 'Group'); 
 
   hbar3d age / midpoints=(20 30 40 50) 
                freq 
                freqlabel='Total in Group' 
                subgroup=sex 
                autoref 
                maxis=axis2 
                raxis=axis1 
                legend=legend1 
                coutline=black 
                cframe=grayaa; 
run; 
quit; 
title;footnote;

/* GCHART例子: 直方图。
   按年龄分组，按性别分段(subgroup=sex)。
   freq要求每个条形的值对应于频数。
   freqlabel指定条形值的坐标轴标签。
   autoref要求绘制参考线。
   maxis=选项指定一个自定义的坐标轴用来画分组轴，
   raxis=选项指定一个自定义的坐标轴用来画条形数值轴。
   coutline指定条形边框颜色。
*/
proc gchart data=samp.fitness2;
   axis1 order=(0 to 20 by 2)
       label=('Number of People')
       minor=(number=1)
       offset=(0,0);
   axis2 label=('Age ' j=r 'Group');
   hbar3d age / midpoints=(20 30 40 50)
       freq
       freqlabel='Total in Group'
       subgroup=sex
       autoref
       maxis=axis2
       raxis=axis1
       coutline=black;
   title1 'Fitness Program Participants';
run;
quit;
title;


/* GCHART例子: 用SUMVAR=分组求和。每个site的sales总和。 */
goptions reset=global gunit=pct border cback=white 
         colors=(black red blue green) ftitle=swissb 
         ftext=swiss htitle=6 htext=3.5; 
title1 'Total Sales'; 
footnote1 h=3 j=r 'GCHBRSUM(a) '; 
 pattern1 color=red; 
 
proc gchart data=samp.totals; 
   format sales dollar8.; 
   hbar site / sumvar=sales; 
run; 
   footnote1 h=3 j=r 'GCHBRSUM(b) '; 
   vbar3d site / sumvar=sales 
                 coutline=black; 
run; 
quit; 
title;footnote;

/* GCHART例子: SUBGROUP, 每个SITE的总sales和，并按DEPT分段 */
goptions reset=global gunit=pct border cback=white 
         colors=(black red green blue) ftitle=swissb 
         ftext=swiss htitle=6 htext=3 
         offshadow=(1.5,1.5); 
title 'Total Sales by Site'; 
footnote h=3 j=r 'GCHBRGRP '; 
 
axis1 label=none 
      origin=(24,); 
axis2 label=none 
      order=(0 to 100000 by 20000) 
      minor=(number=1) 
      offset=(,0); 
 
  /* 去掉纵轴 */ 
axis3 noplane label=none value=none style=0 major=none minor=none; 
 
legend1 label=none 
        shape=bar(3,3) 
        cborder=black 
        cblock=gray 
        origin=(24,); 
 
pattern1 color=lipk; 
pattern2 color=cyan; 
pattern3 color=lime; 
 
proc gchart data=samp.totals; 
   format quarter roman.; 
   format sales dollar8.; 
   vbar3d site / sumvar=sales 
                 subgroup=dept 
                 inside=subpct 
                 outside=sum 
                 width=9 
                 space=4 
                 cframe=gray 
                 maxis=axis1 
                 raxis=axis2 
                 coutline=black 
                 legend=legend1; 
run; 
quit; 
title;footnote;

/* GCHART例子: 带有误差条。
    用TYPE=MEAN和SUMVAR=指定了条形长度为分组的平均值，
    用ERRORBAR=BARS和CLM=95指定对平均值画置信区间。
*/
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red) ftitle=swissb 
         ftext=swiss htitle=5 htext=3.5; 
title1 'Average Resting Heart Rate by Age'; 
footnote h=3 j=r 'GCHERRBR '; 
 
axis1 label=('Heart Rate' j=c 
             'Error Bar Confidence Limits: 95%') 
      minor=(number=1); 
axis2 label=('Age' j=r 'Group'); 
 
pattern1 color=cyan; 
 
proc gchart data=samp.fitness2; 
   hbar age / type=mean 
              freqlabel='Number in Group' 
              meanlabel='Mean  Heart Rate' 
              sumvar=heartrate 
              errorbar=bars 
              noframe 
              clm=95 
              midpoints=(20 30 40 50) 
              raxis=axis1 
              maxis=axis2 
              coutline=black; 
run; 
quit; 


/* GCHART例子：用BLOCK语句作积木图。适用于GROUP=分组。 */
goptions hpos=90 vpos=70;
proc gchart data=samp.houses;
  block style / group=bedrooms;
run;

/* GCHART例子：积木图中用SUMVAR=指定求和。不同SITE的SALES和 */
goptions reset=global gunit=pct border cback=white
         ctext=black colors=(blue green red)
         ftext=swiss ftitle=swissb
         htitle=6 htext=3.5;
 
title 'Total Sales';
footnote j=r 'GCHBKSUM ';

proc gchart data=samp.totals;
   format sales dollar8.;
   block site / sumvar=sales;
run;
quit;
title;footnote;

/* GCHART例子: GROUP=和SUBGROUP=的积木图，用了SUMVAR=和TYPE=MEAN求分组均值。 */
goptions reset=global gunit=pct border cback=white 
         colors=(blue green red) ctext=black 
         ftitle=swissb ftext=swiss htitle=4 htext=3; 

title1 'Average Sales by Department'; 
footnote1 h=3 j=r 'GCHBKGRP '; 
 
legend1 cborder=black 
        label=('Quarter:') 
        position=(middle left outside) 
        mode=protect 
        across=1 
        ; 
 
proc gchart data=samp.totals; 
   format quarter roman.; 
   format sales dollar8.; 
   label site='00'x dept='00'x; 
   block site / sumvar=sales 
                type=mean 
                midpoints='Sydney' 'Atlanta' 
                group=dept 
                subgroup=quarter 
                legend=legend1 
                noheading 
                coutline=black 
                caxis=black; 
run; 
quit; 


/* GCHART例子: 饼图 */
proc gchart data=samp.gpa;
  pie sex;
  pie3d sex;
run;
proc gchart data=samp.gpa;
  pie sex / type=percent;
  pie3d sex / type=percent;
run;

/* GCHART例子: PIE图带SUMVAR */
goptions reset=global gunit=pct border cback=white 
         colors=(blue green red)  ctext=black 
         ftitle=swissb ftext=swiss htitle=6 htext=4; 
title 'Total Sales'; 
footnote h=3 j=r 'GCHPISUM(a) '; 
 
proc gchart data=samp.totals; 
   format sales dollar8.; 
   pie site / sumvar=sales 
              coutline=black; 
run; 
   footnote h=3 j=r 'GCHPISUM(b) '; 
 
   pie3d site / sumvar=sales 
                coutline=black 
                explode='Paris'; 
run; 
quit; 

/* GCHART例子: 饼图。
   第一个图按性别分块，统计量值为aerobic的平均值。
   选项fill=solid是图案的填充方式为实心色块。
   第二个图按性别分组(group=sex)，画并排的两个饼图，
   每个饼图按exercise分块，统计量值为心率的平均值。
   选项discrete表示把exercise看成离散取值，每个值画一块；
   across=2是分组画图时并排的个数，
   ctext是文本颜色，
   explode=4指把exercise取4的块拆分出来，
   slice=arrow要求饼图的块与统计量值用线连接。
 */
title;footnote;
proc gchart data=samp.fitness2;
   pie sex / type=mean
             fill=solid
             sumvar=aerobic;
   pie3d exercise / type=mean
                    sumvar=heartrate 
                    group=sex
                    discrete
                    across=2 
                    fill=solid
                    ctext=blue 
                    explode=4
                    slice=arrow 
                    noheading;
run;
quit;

/* GCHART例子: 饼图标签。用SUMVAR=指定代表的数值（每组仅一个观测） */
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red cyan lime gray) 
         ftitle=swissb ftext=swiss htitle=6 htext=4; 
 
title 'Sources of Energy, 1995'; 
footnote h=3 j=r 'GCHLABEL '; 
 
proc gchart data=samp.enprod (where=(year=1995)); 
   pie engytype / sumvar=produced 
                  other=0 
                  midpoints='Coal' 'Geotherm' 'Petro' 
                            'Biofuels' 'Gas' 'Nuclear' 'Hydro' 
                  value=none 
                  percent=arrow 
                  slice=arrow 
                  cfill=cyan 
                  noheading; 
run; 
quit; 
title;footnote;

/* GCHART例子: 饼图的填充图案 */
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red) ftitle=swissb 
         ftext=swiss htitle=5 htext=4; 
 
title1 'Principle Sources of Energy: 1985'; 
title2 font=swissb h=4.5  '(Amounts in quadrillion btu)'; 
footnote h=3 j=r 'GCHLEGND '; 
 
pattern1 color=black;       /* biofuels         */ 
pattern2 color=blue;        /* coal             */ 
pattern3 color=green;       /* gas              */ 
pattern4 color=gray;        /* geothermal       */ 
pattern5 color=lipk;        /* hydoelectric     */ 
pattern6 color=lime;        /* nuclear          */ 
pattern7 color=cyan;        /* petro            */ 
pattern8 color=red;         /* other            */ 
 
legend1 label=none 
        position=(left middle) 
        offset=(4,) 
        across=1 
        order=('Coal' 'Gas' 'Petro' 
               'Nuclear' 'Renewable') 
        value=(color=black) 
        shape=bar(4,4); 
 
proc gchart data=samp.enprod(where=(year=1985)); 
   pie engytype / sumvar=produced 
                  other=5 
                  otherlabel='Renewable' 
                  descending 
                  legend=legend1 
                  value=inside 
                  ctext=white 
                  coutline=black 
                  noheading; 
run; 
quit; 
title;footnote;

/* GCHART例子: 详细的饼图
   不同SITE的总销售额。
   每一SITE中再按DEPT(DETAIL=DEPT)分扇区。
   DETAIL_PERCENT, DETAIL_SLICE指定标小扇区的标签和数值的方法。
   DETAIL_VALUE=NONE不标出每个小扇区的具体销售额。
*/
goptions reset=global gunit=pct border cback=white 
         ftitle=swissb ftext=swiss htitle=5 htext=2.5; 
 
title1 'Site Sales By Dept (Details)'; 
footnote1 h=3 j=r 'GCHDTPIE '; 
 
proc gchart data=samp.totals; 
   pie site / sumvar=sales 
              detail=dept 
              detail_percent=best 
              detail_value=none 
              detail_slice=best 
              legend; 
run; 
quit; 
title;footnote;



/* GCHART例子：用DONUT语句作圆环图。
   用扇区夹角大小代表数值。
   主要变量SITE分块，SUBGROUP分层。
   数值为SALES总和。
   DONUTPCT指定中间空洞的半径比例。
*/
goptions reset=global gunit=pct border cback=white 
         colors=(blue green red) ctext=black 
         ftitle=swissb ftext=swiss htitle=6 htext=4; 

title 'Sales by Site and Department'; 
footnote  h=3 j=r 'GCHSBGRP '; 

legend1 label=none 
        shape=bar(4,4) 
        position=(middle left) 
        offset=(5,) 
        across=1 
        mode=share; 
 
proc gchart data=samp.totals; 
   donut site / sumvar=sales 
                subgroup=dept 
                noheading 
                donutpct=30 
                label=('All' justify=center 'Quarters') 
                legend=legend1 
                coutline=black 
                ctext=black; 
run; 
quit; 
title;footnote;

/* GCHART例: 用STAR语句作星图(雷达图)。
   用扇骨长度代表数值。
   不同SITE的SALES总和。
*/
goptions reset=global gunit=pct border cback=white 
         colors=(blue green red) ctext=black 
         ftitle=swissb ftext=swiss htitle=6 htext=4; 
title 'Total Sales'; 
footnote h=3 j=r 'GCHSTSUM '; 
 
proc gchart data=samp.totals; 
   format sales dollar8.; 
   star site / sumvar=sales; 
run; 
quit; 
title;footnote;

/* GCHART例: 离散变量星图。
   不同日期的总不合格数。
*/
goptions reset=global gunit=pct border cback=white 
         colors=(black blue green red) ftext=swiss 
         ftitle=swissb htext=3.5 htitle=6; 
title 'Rejected Parts'; 
footnote h=3 j=r 'GCHDSCRT(a) '; 
 
proc gchart data=samp.rejects; 
   format date worddate3.; 
   star date / discrete 
               sumvar=badparts 
               noheading 
               fill=s; 
run; 
 
  footnote h=3 j=r 'GCHDSCRT(b) '; 
 
  star date / discrete 
              sumvar=badparts 
              noconnect 
              noheading 
              coutline=red; 
run; 
quit; 
title;footnote;

/* 盒形图例子。 */
data new;
  set samp.gpa;
  g = 1;
run;
proc boxplot data=new;
  plot gpa * g / boxstyle=schematic;
run;
proc sort data=new;
  by sex;
proc boxplot data=new;
  plot gpa * sex / boxstyle=schematic;
run;



/******************************************/
/*        曲面图和等值线图例子              */
/******************************************/
 
/* 绘图：曲面图和等值线图 */
data dnorm2;
  a=2;
  a2=sqrt(a);
  r=0.5;
  det=a*(1-r*r);
  do x=-3 to 3 by 0.3;
    do y=-3*a2 to 3*a2 by 0.3*a2;
      z=1/(2*3.1415926*sqrt(det))*exp(-0.5/det*
        (a*x*x + y*y - 2*r*a2*x*y));
      output;
    end;
  end;
  keep x y z;
run;
proc g3d data=dnorm2;
  plot y*x=z;
run;
proc gcontour data=dnorm2;
  plot y*x=z / nolegend autolabel;
run;
title;footnote;

/* G3D例子: 帽子 */
goptions reset=global gunit=pct border cback=white
         colors=(black blue green red)
         ftext=swiss ftitle=swissb htitle=6 htext=4;
 
data hat;
   do x=-5 to 5 by 0.25;
      do y=-5 to 5 by 0.25;
         z=sin(sqrt(x*x+y*y));
         output;
      end;
   end;
run;
 
title 'Surface Plot of HAT Data Set';
footnote j=r 'GTDSURFA';
 
proc g3d data=hat;
   plot y*x=z;
run;
quit;
title;footnote;
 

/* G3D例子: 帽子带旋转 */
title 'Surface Plot of HAT Data Set';
footnote j=r 'GTDROTAT';
 
proc g3d data=hat;
   plot y*x=z / grid
                rotate=45
                ctop=red
                cbottom=black
                yticknum=5
                zticknum=5
                zmin=-3
                zmax=1;
run;
quit;
title;footnote;
 
/* G3D例子: 帽子带倾斜 */
title 'Surface Plot of HAT Data Set';
footnote j=r 'GTDTILT';
 
proc g3d data=hat;
   plot y*x=z / side
                tilt=15;
run;
quit;
title;footnote;

/* G3D例子: IRIS三维散点图 */
title1 'Iris Species Classification';
title2 'Physical Measurement';
title3 'Source: Fisher (1936) Iris Data';
footnote1 j=l '  Petallen: Petal Length in mm.'
          j=r 'Sepallen: Sepal Length in mm.   ';
footnote2 j=l '  Petalwid: Petal Width in mm.'
          j=r 'Sepal Width not shown          ';
footnote3 j=r 'GTDSCATR';
 
proc g3d data=samp.iris;
   scatter petallen*petalwid=sepallen;
run;
quit;
title;footnote;

/* G3D例子: IRIS三维散点图，不同符号 */
data iris2;
   set samp.iris;
   length species $12. colorval $8. shapeval $8.;
   if species='Setosa' then
      do;
         shapeval='club';
         colorval='blue';
      end;
   if species='Versicolor' then
      do;
         shapeval='diamond';
         colorval='red';
      end;
   if species='Virginica' then
      do;
         shapeval='spade';
         colorval='green';
      end;
run;
 
title1 'Iris Species Classification';
title2 'Physical Measurement';
title3 'Source: Fisher (1936) Iris Data';
footnote1 j=l '  Petallen: Petal Length in mm.'
          j=r 'Petalwid: Petal Width in mm. ';
footnote2 j=l '  Sepallen: Sepal Length in mm.'
          j=r 'Sepal Width not shown      ';
footnote3 j=r 'GTDSHAPE(a)';
 
proc g3d data=iris2;
   scatter petallen*petalwid=sepallen
         / color=colorval
           shape=shapeval;
 
   note;
   note j=r 'Species:   ' c=green 'Virginica       '
        j=r c=red 'Versicolor      '
        j=r c=blue 'Setosa         ';
run;
 
title3;
footnote1 j=l '  Source: Fisher (1936) Iris Data';
footnote2 j=r 'GTDSHAPE(b)';
 
proc g3d data=iris2;
   scatter petallen*petalwid=sepallen
         / noneedle
           grid
           color=colorval
           shape=shapeval;
 
   label petallen='Petal Length'
         petalwid='Petal Width'
         sepallen='Sepal Length';
run;
quit;
title;footnote;

/* 绘图：中文字体 */
goptions ftext="宋体" ftitle="黑体" htitle=6 pct htext=3 pct;
proc gplot data=samp.class;
  title "试验SAS图形的汉字功能";
  symbol i=none v=square;
  plot weight * height;
  label weight = "体重" height="身高";
run;



/******************************************/
/*         Analyst生成的程序例子           */
/******************************************/
/* 从数据集随机抽样。无放回 */
proc sql;
  create view _tmpv as
    select *, ranuni(0) as _random
    from indata
    order by calculated _random;
quit;
data sampled;
  set indata(obs=nsub);
run;
proc datasets lib=work noprint;
  delete _tmpv /memtype=view;
quit;


/* 从数据集随机无放回抽样另一做法，按比例抽取 */
data sampled;
  set indata;
  if ranuni(0) le 0.1 then output;
run;


/* 有放回抽样。SET语句的NOBS选项可以指定包含行数的临时变量并且此
   变量在编译时赋值。POINT=选项指定观测号读取。数据步末尾需要STOP语句。
 */
data sampled;
  do i=1 to 10;
    point = ceil(ranuni(0)*nobs);
    set indata point=point nobs=nobs;
    output;
  end;
  stop;
run;


    
    
    
/******************************************/
/*         PROC DATASETS例子              */
/******************************************/
    
/* PROC DATASETS：列出数据集*/
PROC DATASETS LIBRARY=samp 
    MEMTYPE=DATA NOLIST;
  CONTENTS DATA=_all_ DETAILS wNODS;
RUN;QUIT;

/* PROC SQL： 把数据集列表存入数据集*/
PROC SQL;
  CREATE TABLE sampdir AS
    SELECT * 
      FROM DICTIONARY.TABLES
      WHERE LIBNAME='SAMP';
QUIT;


/* PROC DATASETS： 显示并保存指定数据集的变量列表 */
PROC DATASETS LIBRARY=samp 
    MEMTYPE=DATA NOLIST;
  CONTENTS DATA=class OUT=vlist;
RUN;QUIT;
/* PROC CONTENTS：列出数据集中变量列表 */
PROC CONTENTS data=samp.class;
RUN;
PROC CONTENTS data=samp.class OUT=vlist;
RUN;


/* PROC DATASETS： 数据集改名 */
PROC DATASETS LIBRARY=samp 
    MEMTYPE=DATA NOLIST;
  CHANGE c9501f=c9501nv c9501m=c9501nan;
RUN;QUIT;
PROC DATASETS LIBRARY=samp 
    MEMTYPE=DATA NOLIST;
  CHANGE c9501nv=c9501f c9501nan=c9501m;
RUN;QUIT;

/* PROC DATASETS： 删除数据集 */
PROC DATASETS NOLIST;
  COPY OUT=WORK IN=SAMP;
  SELECT class gpa;
RUN;QUIT;
PROC SQL;
  CREATE VIEW c9501v AS
    SELECT * 
    FROM samp.c9501 a, samp.bkmoney b
    WHERE a.name=b.name;
RUN;QUIT;
PROC DATASETS LIBRARY=work NOLIST;
  DELETE class gpa;
  DELETE c9501v / MEMTYPE=VIEW;
RUN;QUIT;

/* PROC DATASETS： 完全复制逻辑库 */
PROC DATASETS NOLIST;
  COPY OUT=WORK IN=SAMP;
RUN;QUIT;
/* PROC DATASETS： 复制部分数据集 */
PROC DATASETS NOLIST;
  COPY OUT=WORK IN=SAMP;
  SELECT c9501 class gpa;
RUN;QUIT;

/* PROC DATASETS： 在数据集末尾添加内容 */
PROC DATASETS LIBRARY=work
    MEMTYPE=DATA NOLIST;
  APPEND BASE=c9501fm DATA=samp.c9501m;
  APPEND BASE=c9501fm DATA=samp.c9501f;
RUN;QUIT;

/* PROC DATASETS： 修改变量属性 */
PROC DATASETS LIBRARY=WORK NOLIST;
  COPY OUT=work IN=samp;
  SELECT class;
RUN;
  MODIFY class;
  ATTRIB name LABEL='姓名'
    weight FORMAT=8.2;
QUIT;
PROC PRINT DATA=class label;
RUN;

/* PROC DATASETS： 修改变量名 */
PROC DATASETS LIBRARY=WORK NOLIST;
  COPY OUT=work IN=samp;
  SELECT class;
RUN;
  MODIFY class;
  RENAME height=h weight=w;
QUIT;
PROC PRINT DATA=class;run;


/******************************************/
/*         PROC RANK例子                  */
/******************************************/
/* 由低到高排名，同名次取平均值 */
PROC RANK DATA=samp.class OUT=rc;
  VAR age;
  RANKS agerank;
run;
/* 由高到低排名，同名次取最小值 */
PROC RANK DATA=samp.class OUT=rc 
    DESCENDING TIES=LOW;
  VAR age;
  RANKS agerank;
run;
    

/******************************************/
/*     PROC STANDARD例子                  */
/******************************************/
PROC STANDARD DATA=samp.class OUT=sc 
    MEAN=0 STD=1; 
  VAR age;
run;


/******************************************/
/*     PROC FORMAT例子                    */
/******************************************/
proc format;
  VALUE sexotf
    1='男'
    2='女';
run;
data sexd;
  input sex;
  format sex sexotf.;
  cards;
1 
2 
;
run;
proc print;run;

proc format;
  VALUE $sexf
    'F'='女'
    'M'='男';
run;
data sexd;
  input sex $;
  format sex $sexf.;
  cards;
F
M
;
run;
proc print;run;

proc format;
  INVALUE $sexotinf
    1='男'
    2='女';
run;
data sexd;
  input sex $sexotinf. ;
  cards;
1 
2 
;
run;
proc print;run;

proc format;
  INVALUE sexinf
    '男'=1
    '女'=2;
run;
data sexd;
  input sex sexinf. ;
  cards;
男
女
;
run;
proc print;run;

proc format;
  INVALUE trial
    'A'-'M'=1
    'N'-'Z'=2
    1-999,1001-1999,2001-2999=3
    9999=.
    other=_error_;
run;

/* 列出WORK库的FORMATS catalog中已保存的格式 */
proc format fmtlib;
run;

/* 把WORK.FORMATS catalog中保存的格式转存到数据集tfm中  */
proc format cntlout=tfm;
run;
proc print;run;



/******************************************/
/*          PROC REPORT例子               */
/******************************************/

/* 以BUSINESS数据集为例。包括国家、行业两个分类变量，
 * 公司名、雇用人数、销售额、利润等变量。
 */

/* 显示所有变量。每一观测列表。  */
proc report data=samp.business(obs=20) nowindows;
run;

PROC REPORT DATA==samp.c9501 NOWINDOWS;
RUN;

proc report data=samp.class nowindows;
run;

/* 仅显示数值型变量时，缺省输出为总和而非单独观测。  */
proc report data=samp.business(
  keep=employs sales profits) nowindows;
run;

PROC REPORT DATA=samp.c9501(
  KEEP=math chinese) NOWINDOWS;
RUN;

proc report data=samp.class(
  keep=age height weight) nowindows;
run;

/* 用COLUMN语句指定要显示的变量。  */
proc report data=samp.business(obs=20) nowindows;
  COLUMN company industry profits;
run;

PROC REPORT DATA=samp.c9501 NOWINDOWS;
  COLUMN name sex math chinese;
RUN;

proc report data=samp.class nowindows;
  COLUMN name sex age height weight;
run;

/* 用DEFINE中的ORDER选项指定排序变量。
   多个排序变量时嵌套的次序按COLUMN中的出现次序，
   如果COLUMN中没有则按数据集中次序。
 */
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex age name height weight;
  DEFINE sex / ORDER;
  DEFINE age / ORDER;
RUN;

/* 用ORDER=指定变量值排序的规则，
   取DATA指定按照数据中出现的先后次序，
   取FORMATTED指定按照数据值经输出格式转化后的次序(缺省选择)，
   取FREQ使得数据值按照出现次数由少到多排序，
   取INTERNAL是按照数据值内部表示排序。
   可以加DESCENDING选项使得变量值按降序排列。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex age name height weight;
  DEFINE sex / ORDER ORDER=FREQ DESCENDING;
  DEFINE age / ORDER DESCENDING;
RUN;


/* 用DEFINE的GROUP选项分组汇总，
   对其它数值型变量计算总和（缺省统计量）。
   这时分组变量之外不能有字符型变量，
   否则无法进行汇总。
*/
proc report data=samp.class nowindows;
  COLUMN sex age height weight;
  define sex / GROUP;
run;
       
proc report data=samp.class nowindows;
  COLUMN sex age height weight;
  define sex / GROUP;
  define age / GROUP;
run;

/* 用DEFINE的GROUP选项分组汇总，
   用DEFINE的ANALYSIS MEAN选项指定其他变量计算组内的平均值。
   可以计算统计量包括N, NMISS, MEAN, STD, SUM, VAR, CV,
   PCTN, PCTSUM, MEDIAN, Q1, Q3, P1, P5, P10, P90, P95, P99等。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  TITLE 'Mean height and weight in sex groups';
  COLUMN sex height weight;
  DEFINE sex / GROUP;
  DEFINE height / ANALYSIS MEAN;
  DEFINE weight / ANALYSIS MEAN;
RUN;
       
proc report data=samp.class nowindows;
  column sex age height weight;
  define sex / GROUP;
  define age / GROUP;
  define height / ANALYSIS MEAN;
  define weight / ANALYSIS MEAN;
run;

/* 分组汇总的分组变量也可以指定次序。
不指定次序时各组按格式化输出值排序。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex age height weight;
  DEFINE sex / GROUP ORDER=FREQ DESCENDING;
  DEFINE age / GROUP;
  DEFINE height / ANALYSIS MEAN;
  DEFINE weight / ANALYSIS MEAN;
RUN;

/* 定义新变量。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN name height weight ratio;
  DEFINE name / ORDER;
  DEFINE height / DISPLAY;
  DEFINE weight / DISPLAY;
  DEFINE ratio / COMPUTED FORMAT=6.2;
  COMPUTE ratio;
    ratio = weight / height;
  ENDCOMP;
RUN;

/* 输出数据集 */
PROC REPORT DATA=samp.class 
    OUT=repd NOWINDOWS;
  COLUMN sex age height weight;
  DEFINE sex / GROUP;
  DEFINE age / GROUP;
  DEFINE height / ANALYSIS MEAN;
  DEFINE weight / ANALYSIS MEAN;
RUN;
PROC PRINT;RUN;

/* 在DEFINE语句中用FORMAT=指定该列的输出格式。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex age height weight;
  DEFINE sex / GROUP;
  DEFINE age / GROUP;
  DEFINE height / ANALYSIS MEAN FORMAT=8.2;
  DEFINE weight / ANALYSIS MEAN FORMAT=8.2;
RUN;

/* PROC REPORT语句的SPACING=选项指定列间的空格个数。
   DEFINE语句的SPACING=选项指定该变量左侧与前一项分隔使用的空格数。
   PROC REPORT语句的COLWIDTH=选项指定每一列通用的字符数。
   DEFINE语句的WIDTH=指定该列占用的字符数。
   DEFINE语句的CENTER、LEFT、RIGHT选项指定
   居中对齐、左对齐、右对齐。
*/
PROC REPORT DATA=samp.class NOWINDOWS SPACING=5;
  COLUMN name sex age;
  DEFINE name / WIDTH=10 RIGHT;
  DEFINE sex / WIDTH=2;
  DEFINE age / WIDTH=2;
RUN;

/* DEFINE语句中的字符串选项表示列标题。
   DEFINE语句的选项中可以写字符串作为列标题
   （缺省使用变量标签或变量名作为列标题），
   列标题中用'/'表示换行，或者把列标题用
   两个字符串表示也可以在上下两行显示。
   可以用PROC REPORT的SPLIT=选项指定在
   列标题字符串中表示换行的字符。
*/
PROC REPORT DATA=samp.class NOWINDOWS ;
  COLUMN sex height;
  DEFINE sex / GROUP '性 别' WIDTH=2;
  DEFINE height / '身高' ANALYSIS 
    MEAN FORMAT=8.2;
RUN;
PROC REPORT DATA=samp.class NOWINDOWS ;
  COLUMN sex height weight;
  DEFINE sex / GROUP '性 别' WIDTH=2;
  DEFINE height / '身高/平均值' ANALYSIS 
    MEAN FORMAT=8.2;
  DEFINE weight / '体重' '平均值' ANALYSIS 
    MEAN FORMAT=8.2;
RUN;

/* PROC REPORT的HEADLINE选项在列标题行下划线，
   HEADSKIP选项在列标题行下空行。
*/
PROC REPORT DATA=samp.class 
    NOWINDOWS HEADLINE HEADSKIP;
  COLUMN sex height;
  DEFINE sex / GROUP '性 别' WIDTH=2;
  DEFINE height / '身高' ANALYSIS 
    MEAN FORMAT=8.2;
RUN;

proc report data=samp.class nowindows 
    headline headskip;
  title 'Mean height and weight by sex';
  column sex height weight;
  define sex / '性/别' group width=2;
  define height / '平均' '身高' analysis mean;
  define weight / '平均' '体重' analysis mean;
run;

/* 在COLUMN语句中用圆括号把一个字符串和多个输出项组合在一起，
   可以使得多个输出项上方共享此字符串作为合并栏目标题。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex age ('平均值' height weight);
  DEFINE sex / GROUP;
  DEFINE age / GROUP;
  DEFINE height / ANALYSIS MEAN;
  DEFINE weight / ANALYSIS MEAN;
RUN;

/* 用别名的办法对同一变量计算多种统计量。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex height height=htmin height=htmax;
  DEFINE sex / '性别' GROUP WIDTH=4;
  DEFINE height / '平均身高' ANALYSIS MEAN 
    WIDTH=10 FORMAT=6.2;
  DEFINE htmin / '最低' ANALYSIS MIN FORMAT=5.1;
  DEFINE htmax / '最高' ANALYSIS MAX FORMAT=5.1;
RUN;

/* 用COLUMN语句逗号格式指定多个统计量。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex height height,(MEAN MIN MAX);
  DEFINE sex / '性别' ORDER WIDTH=4;
  DEFINE height / '身高' ANALYSIS SUM 
    FORMAT=6.2;
  COMPUTE AFTER sex;
    LINE @21 '最低身高:' height.min 5.1
      +3 '最高身高:' height.max 5.1;
    LINE ' ';
  ENDCOMP;
RUN;


/* 在DEFINE中指定ACROSS变量作为列分组变量，每个值占一列，
   统计该值的频数。
   特殊变量N代表组内的非缺失观测个数。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex N;
  DEFINE sex / ACCROSS WIDTH=2;
  DEFINE N / '总计';
RUN;
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN age sex N;
  DEFINE age / GROUP;
  DEFINE sex / ACROSS WIDTH=2;
  DEFINE N / '总计';
RUN;

/* 用DEFINE语句的ACROSS选项指定的列分组变量，
   可以在COLUMN语句中该变量后面写逗号然后写
   一个或多个分组后概括统计的变量，多个时
   用圆括号包围。
   列分组变量和分析变量在COLUMN语句中的次序也
   可以反过来，结果显示略有不同。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex, (height weight);
  DEFINE sex / ACROSS WIDTH=2;
  DEFINE height / '平均身高'
    ANALYSIS MEAN FORMAT=8.2;
  DEFINE weight / '平均体重'
    ANALYSIS MEAN FORMAT=8.2;
RUN;
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN (height weight), sex;
  DEFINE sex / ACROSS WIDTH=2;
  DEFINE height / '平均身高'
    ANALYSIS MEAN FORMAT=8.2;
  DEFINE weight / '平均体重'
    ANALYSIS MEAN FORMAT=8.2;
RUN;

/* 在计算代码块中用_C1_, _C2_等代替列名。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex, (height weight) ratio;
  DEFINE sex / ACROSS WIDTH=2;
  DEFINE height / '平均身高'
    ANALYSIS MEAN FORMAT=8.2;
  DEFINE weight / '平均体重'
    ANALYSIS MEAN FORMAT=8.2;
  DEFINE ratio / '男女平均身高比' 
    COMPUTED FORMAT=6.2;
  COMPUTE ratio;
    ratio = _C3_ / _C1_;
  ENDCOMP;
RUN;

/* BREAK和RBREAK语句。
   BREAK AFTER 语句指定某个分组变量，
   在此分组变量每个组末尾显示额外信息，
   BREAK BEFORE 则显示在每个组开头。
   RBREAK AFTER规定如何对所有输出最后额外显示，
   RBREAK BEFORE规定如何对所有输出开头额外显示。
   BREAK的SUMMARIZE选项要求显示分析变量在本组的概括统计量，
   SKIP选项使得额外输出后空行，
   OL选项使得额外输出数据行上面画线，
   DOL选项使得额外输出数据行上面画双线，
   SUPPRESS选项可以时额外输出行的分组值不显示。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex age height weight;
  DEFINE sex / GROUP;
  DEFINE age /GROUP;
  DEFINE height / ANALYSIS MEAN FORMAT=8.2;
  DEFINE weight / ANALYSIS MEAN FORMAT=8.2;
  BREAK AFTER sex / SUMMARIZE SKIP OL;
  RBREAK AFTER / SUMMARIZE SKIP DOL;
RUN;

/* 用COMPUTE AFTER定制每组概括和全表概括。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex name height weight;
  DEFINE sex / ORDER;
  DEFINE name / DISPLAY;
  DEFINE height / ANALYSIS MEAN FORMAT=8.2;
  DEFINE weight / ANALYSIS MEAN FORMAT=8.2;
  COMPUTE AFTER sex;
    LINE @11 '平均身高(' sex $2. '): ' 
      @25 height.mean 6.2
      @41 '平均体重(' sex $2. '): ' 
      @55 weight.mean 6.2;
  ENDCOMP;
  COMPUTE AFTER;
    LINE '';
    LINE @11 '总平均身高:   ' height.mean 6.2
      @41 '总平均体重:   ' weight.mean 6.2;
  ENDCOMP;
RUN;

/* 用NOPRINT选项指定需要在计算代码块中使用的变量或别名。
*/
PROC REPORT DATA=samp.class NOWINDOWS;
  COLUMN sex height height=htmin height=htmax;
  DEFINE sex / '性别' ORDER WIDTH=4;
  DEFINE height / '身高' ANALYSIS SUM 
    FORMAT=6.2;
  DEFINE htmin / '最低' ANALYSIS MIN NOPRINT;
  DEFINE htmax / '最高' ANALYSIS MAX NOPRINT;
  COMPUTE AFTER sex;
    LINE @21 '最低身高:' htmin 5.1
      +3 '最高身高:' htmax 5.1;
    LINE ' ';
  ENDCOMP;
RUN;


/* PROC REPORT手册例1.
   仅使用COLUMN指定输出列，
   用RBREAK指定缺省全集概括。
   用FORMAT语句规定列输出格式。
   NOWD是NOWINDOWS的简写。
*/
data grocery;
  input Sector $ Manager $ Department $ Sales @@;
  datalines;
se 1 np1 50 se 1 p1 100 se 1 np2 120 se 1 p2 80
se 2 np1 40 se 2 p1 300 se 2 np2 220 se 2 p2 70
nw 3 np1 60 nw 3 p1 600 nw 3 np2 420 nw 3 p2 30
nw 4 np1 45 nw 4 p1 250 nw 4 np2 230 nw 4 p2 73
nw 9 np1 45 nw 9 p1 205 nw 9 np2 420 nw 9 p2 76
sw 5 np1 53 sw 5 p1 130 sw 5 np2 120 sw 5 p2 50
sw 6 np1 40 sw 6 p1 350 sw 6 np2 225 sw 6 p2 80
ne 7 np1 90 ne 7 p1 190 ne 7 np2 420 ne 7 p2 86
ne 8 np1 200 ne 8 p1 300 ne 8 np2 420 ne 8 p2 125
;
run;

proc format;
  value $sctrfmt 'se' = 'Southeast'
    'ne' = 'Northeast'
    'nw' = 'Northwest'
    'sw' = 'Southwest';
  value $mgrfmt '1' = 'Smith' '2' = 'Jones'
    '3' = 'Reveiz' '4' = 'Brown'
    '5' = 'Taylor' '6' = 'Adams'
    '7' = 'Alomar' '8' = 'Andrews'
    '9' = 'Pelfrey';
  value $deptfmt 'np1' = 'Paper'
    'np2' = 'Canned'
    'p1' = 'Meat/Dairy'
    'p2' = 'Produce';
run;

proc report data=grocery nowd;
  column manager department sales;
  rbreak after / dol summarize;
  where sector='se';
  format manager $mgrfmt. department $deptfmt.
    sales dollar11.2;
  title 'Sales for the Southeast Sector';
  title2 "for &sysdate";
run;

/* PROC REPORT手册例2.
   manager作为ORDER变量，以格式化值次序排列。
   department作为ORDER变量，以内部编码次序排列。
   用BREAK AFTER manage对每一经理组进行缺省概括。
   在COMPUTE AFTER中用LINE语句在全表最后加定制概括行。
*/
proc report data=grocery nowd
    colwidth=10 spacing=5
    headline headskip;
  column manager department sales;
  define manager / order 
    order=formatted format=$mgrfmt.;
  define department / order 
    order=internal format=$deptfmt.;
  define sales / analysis sum 
    format=dollar7.2;
  break after manager / ol summarize skip;
  compute after;
    line 'Total sales for these stores were: '
      sales.sum dollar9.2;
  endcomp;
  where sector='se';
  title 'Sales for the Southeast Sector';
run;

/* PROC REPORT手册例3.
   在COLUMN语句中用sales=salesmin和sales=salesmax
   定义了两个别名，这样可以对同一变量在不同输出列计算不同的统计量。
   最后salesmin和salesmax两列用了DEFINE语句NOPRINT选项不显示出来
   (每一数据行行中的salesmin和salesmax实际是对一个值求最小和最大，
    即使显示也等于sales本身，但是需要指定这样的统计量才能在概括行中
    对其求最小或最大)，
   仅在自定义概括行中使用其值。
   在LINE语句中，用53*'-'这样的办法输出连续多个相同字符。
*/
proc report data=grocery nowd headline headskip;
  column manager department sales
    sales=salesmin sales=salesmax;
  define manager / order order=formatted
    format=$mgrfmt. 'Manager';
  define department / order order=internal
    format=$deptfmt. 'Department'; 
  define sales / analysis sum format=dollar7.2 'Sales';
  define salesmin / analysis min noprint;
  define salesmax / analysis max noprint;
  compute after;
    line ' ';
    line @7 53*'-';
    line @7 '| Departmental sales ranged from'
      salesmin dollar7.2 +1 'to' +1 salesmax dollar7.2
      '. |';
    line @7 53*'-';
  endcomp;
  where sector='se';
  title 'Sales for the Southeast Sector';
  title2 "for &sysdate";
run;

/* PROC REPORT手册例4.
   分组统计汇总。
   用LINE语句输出全表后定制概括，sales.sum表示sales的和。
   用CALL DEFINE语句仅对概括行修改了sales列的输出格式。
*/
proc report data=grocery nowd headline headskip;
  column sector manager sales;
  define sector / group format=$sctrfmt. 'Sector';
  define manager / group format=$mgrfmt. 'Manager';
  define sales / analysis sum format=comma10.2 'Sales';
  break after sector / ol summarize suppress skip;
  compute after;
    line 'Combined sales for the northern sectors were'
      sales.sum dollar9.2 '.';
  endcomp;
  compute sales;
    if _break_ ne ' ' then
      call define(_col_,"format","dollar11.2");
  endcomp;
  where sector contains 'n';
  title 'Sales Figures for Northern Sectors';
run;

/* PROC REPORT手册例5.
   department指定为ACROSS变量，作为列维分类变量，
   并在COLUMN语句用逗号与sales相联使得department
   的每类值下面都计算sales的总和。
   perish是COMPUTED变量，在计算代码块中定义如何计算。
   为了访问列维分类后的列，用_C3_, _C4_这样的名字。
*/
proc report data=grocery nowd headline headskip split='*';
  column sector manager department,sales perish;
  define sector / group format=$sctrfmt. 'Sector' '';
  define manager / group format=$mgrfmt. 'Manager* ';
  define department / across format=$deptfmt. '_Department_';
  define sales / analysis sum format=dollar11.2 ' ';
  define perish / computed format=dollar11.2 'Perishable*Total';
  break after manager / skip;
  compute perish;
    perish=sum(_c3_, _c4_);
  endcomp;
  compute after;
    line @4 57*'-';
    line @4 '| Combined sales for meat and dairy : '
      @46 _c3_ dollar11.2 ' |';
    line @4 '| Combined sales for produce : '
      @46 _c4_ dollar11.2 ' |';
    line @4 '|' @60 '|';
    line @4 '| Combined sales for all perishables: '
      @46 _c5_ dollar11.2 ' |';
    line @4 57*'-';
  endcomp;
  where sector contains 'n'
    and (department='p1' or department='p2');
  title 'Sales Figures for Perishables in Northern Sectors';
run;

/* PROC REPORT手册例6.
   在COLUMN语句中用逗号格式对一个变量计算多个统计量。
*/
proc report data=grocery nowd headline headskip
    ls=66 ps=18;
  column sector manager sales,(Sum Min Max Range Mean Std);
  define manager / group format=$mgrfmt. id;
  define sector / group format=$sctrfmt.;
  define sales / format=dollar11.2 ;
  title 'Sales Statistics for All Sectors';
run;

/* PROC REPORT手册例7.
   用OUTREPT=指定一个输出catalog项目保存表定义。
   用REPORT=指定表定义。
   NAMED选项表示数据中用``变量名=变量值''方法显示，
   不显示列标题。
   WRAP要求当行超长是直接延长到下一行，而不是拆分成两张表。
*/
proc report data=grocery nowd named wrap ls=64 ps=36
    outrept=sasuser.reports.namewrap;
  column sector manager department sales;
  define sector / format=$sctrfmt.;
  define manager / format=$mgrfmt.;
  define department / format=$deptfmt.;
  define sales / format=dollar11.2;
  where manager='1';
  title "Sales Figures for Smith on &sysdate";
run;
proc report data=grocery report=sasuser.reports.namewrap nowd;
  where sector='sw';
  title "Sales Figures for the Southwest Sector on &sysdate";
run;

/* PROC REPORT手册例8.
   分栏（左右并列）显示表以节省空间。
   用PANELS=指定运行最多左右并排表格数。
   用PSPACE指定左右并排间隙大小。
   仅对LISTINGS输出目标有效。
*/
proc report data=grocery nowd headline formchar(2)='~'
    panels=99 pspace=6 ls=64 ps=18;
  column manager department sales;
  define manager / order order=formatted format=$mgrfmt.;
  define department / order order=internal format=$deptfmt.;
  define sales / format=dollar7.2;
  break after manager / skip;
  where sector='nw' or sector='sw';
  title 'Sales for the Western Sectors';
run;

/* PROC REPORT手册例9.
   BREAK AFTER MANAGER的PAGE选项要求每个经理占一页。
   COMPUTE BEFORE _PAGE_在每页开头输出自定义内容。
   COMPUTE AFTER manager对每个经理输出了自定义的业绩评价。
*/
proc report data=grocery nowd headline headskip;
  title 'Sales for Individual Stores';
  column sector manager department sales Profit;
  define sector / group noprint;
  define manager / group noprint;
  define profit / computed format=dollar11.2;
  define sales / analysis sum format=dollar11.2;
  define department / group format=$deptfmt.;
  compute profit;
    if department='np1' or department='np2'
      then profit=0.4*sales.sum;
    else profit=0.25*sales.sum;
  endcomp;
  compute before _page_ / left;
    line sector $sctrfmt. ' Sector';
    line 'Store managed by ' manager $mgrfmt.;
    line ' ';
    line ' ';
    line ' ';
  endcomp;
  break after manager / ol summarize page;
  compute after manager;
    length text $ 35;
    if sales.sum lt 500 then
      text='Sales are below the target region.';
    else if sales.sum ge 500 and sales.sum lt 1000 then
      text='Sales are in the target region.';
    else if sales.sum ge 1000 then
      text='Sales exceeded goal!';
    line ' ';
    line text $35.;
  endcomp;
run;


/* PROC REPORT手册例10.
   统计量名PCTSUM表示当前格占总和的百分比。
   COLUMN语句中用括号把一个引导字符串和若干变量组合起来表示多栏标题。
   DEFINE语句的FLOW选项允许字符串值在当前列延长到下一行。
*/
proc report data=grocery nowd headline;
  title;
  column ('Individual Store Sales as a Percent of All Sales'
      sector manager sales,(sum pctsum) comment);
  define manager / group format=$mgrfmt.;
  define sector / group format=$sctrfmt.;
  define sales / format=dollar11.2 '';
  define sum / format=dollar9.2 'Total Sales';
  define pctsum / 'Percent of Sales' format=percent6. width=8;
  define comment / computed width=20 '' flow;
  compute comment / char length=40;
    if sales.pctsum gt .15 and _break_ = ' '
      then comment='Sales substantially above expectations.';
    else comment=' ';
  endcomp;
  rbreak after / ol summarize;
run;


/* PROC REPORT手册例11.
   缺省情况下缺失值不列入表内。
   加MISSING选项后，分组变量和排序变量的缺失值也作为单独类别。
*/
data grocmiss;
  input Sector $ Manager $ Department $ Sales @@;
  datalines;
se 1 np1 50 . 1 p1 100 se . np2 120 se 1 p2 80
se 2 np1 40 se 2 p1 300 se 2 np2 220 se 2 p2 70
nw 3 np1 60 nw 3 p1 600 . 3 np2 420 nw 3 p2 30
nw 4 np1 45 nw 4 p1 250 nw 4 np2 230 nw 4 p2 73
nw 9 np1 45 nw 9 p1 205 nw 9 np2 420 nw 9 p2 76
sw 5 np1 53 sw 5 p1 130 sw 5 np2 120 sw 5 p2 50
. . np1 40 sw 6 p1 350 sw 6 np2 225 sw 6 p2 80
ne 7 np1 90 ne . p1 190 ne 7 np2 420 ne 7 p2 86
ne 8 np1 200 ne 8 p1 300 ne 8 np2 420 ne 8 p2 125
;
proc report data=grocmiss nowd headline;
  column sector manager N sales;
  define sector / group format=$sctrfmt.;
  define manager / group format=$mgrfmt.;
  define sales / format=dollar9.2;
  rbreak after / dol summarize;
  title 'Summary Report for All Sectors and Managers';
run;
proc report data=grocmiss nowd headline missing;
  column sector manager N sales;
  define sector / group format=$sctrfmt.;
  define manager / group format=$mgrfmt.;
  define sales / format=dollar9.2;
  rbreak after / dol summarize;
run;

/* PROC REPORT手册例12.
   用OUT=选项把显示结果保存为数据集。
*/
proc report data=grocery nowd
    out=temp( where=(sales gt 1000) );
  column manager sales;
  define manager / group noprint;
  define sales / analysis sum noprint;
run;
proc report data=temp box nowd;
  column manager sales;
  define manager / group format=$mgrfmt.;
  define sales / analysis sum format=dollar11.2;
  title 'Managers with Daily Sales';
  title2 'of over';
  title3 'One Thousand Dollars';
run;

/* PROC REPORT手册例13.
   用OUT=输出数据集，可以把计算的新变量输出。
*/
title;
proc report data=grocery nowd out=profit;
  column sector manager department sales Profit;
  define profit / computed;
  /* Compute values for Profit. */
  compute profit;
    if department='np1' or department='np2' 
      then profit=0.4*sales.sum;
    else profit=0.25*sales.sum;
  endcomp;
run;
proc chart data=profit;
  block sector / sumvar=profit;
  format sector $sctrfmt.;
  format profit dollar7.2;
  title 'Sum of Profit by Sector';
run;


/* PROC REPORT手册例14.
   分组变量(GROUP)或列维分组变量(ACROSS)是按照格式化显示值分组的。
   所以可以定义输出格式把若干类显示成相同值，于是分到相同组。
*/
proc format;
  value $perish
    'p1', 'p2'='Perishable'
    'np1','np2'='Nonperishable';
run;
proc report data=grocery nowd headline headskip;
  column manager department,sales sales;
  define manager / group order=formatted format=$mgrfmt.;
  define department / across order=formatted format=$perish. '';
  define sales / analysis sum format=dollar9.2 width=13;
  compute after;
    line ' ';
    line 'Total sales for these stores were: '
      sales.sum dollar9.2;
  endcomp;
  title 'Sales Summary for All Stores';
run;

/* PROC REPORT手册例15.
   在PROC REPORT语句中指定表的各部分的统一样式。
*/
ods html body='test15.htm';
ods pdf file='test15.pdf';
ods rtf file='test15.rtf';
proc report data=grocery nowd headline headskip
    style(report)=[cellspacing=5 borderwidth=10 bordercolor=blue]
    style(header)=[foreground=yellow font_style=italic font_size=6]
    style(column)=[foreground=moderate brown font_face=helvetica font_size=4]
    style(lines)=[foreground=white background=black
        font_style=italic font_weight=bold font_size=5]
    style(summary)=[foreground=cx3e3d73 background=cxaeadd9
        font_face=helvetica font_size=3 just=r];    
  column manager department sales;
  define manager / order order=formatted format=$mgrfmt. 'Manager';
  define department / order order=internal format=$deptfmt. 'Department';
  break after manager / summarize;
  compute after manager;
    line 'Subtotal for ' manager $mgrfmt. 'is '  sales.sum dollar7.2 '.';
  endcomp;
  compute after;
    line 'Total for all departments is: ' sales.sum dollar7.2 '.';
  endcomp;
  where sector='se';
  title 'Sales for the Southeast Sector';
run;
ods html close;
ods pdf close;
ods rtf close;

/* PROC REPORT手册例16.
   除了PROC REPORT语句中定义统一样式之外，
   还在DEFINE语句中定义单独的列样式或列标题样式，
   在计算代码块中用CALL DEFINE有条件地给单元格施加不同样式。
*/
ods html body='test16.htm';
ods pdf file='test16.pdf';
ods rtf file='test16.rtf';
proc report data=grocery nowd headline headskip
    style(report)=[cellspacing=5 borderwidth=10 bordercolor=blue]
    style(header)=[foreground=yellow font_style=italic font_size=6]
    style(column)=[foreground=moderate brown font_face=helvetica font_size=4]
    style(lines)=[foreground=white background=black
        font_style=italic font_weight=bold font_size=5]
    style(summary)=[foreground=cx3e3d73 background=cxaeadd9
        font_face=helvetica font_size=3 just=r];
  column manager department sales;
  define manager / order order=formatted format=$mgrfmt. 'Manager'
      style(header)=[foreground=white background=black];
  define department / order order=internal format=$deptfmt. 'Department'
      style(column)=[font_style=italic];
  break after manager / summarize;
  compute after manager / 
      style=[font_style=roman font_size=3 font_weight=bold
        background=white foreground=black];
    line 'Subtotal for ' manager $mgrfmt. 'is ' sales.sum dollar7.2 '.';
  endcomp;   
  compute sales;
    if sales.sum>100 and _break_=' ' then
      call define(_col_, "style",
        "style=[background=yellow font_face=helvetica font_weight=bold]");
  endcomp;
  compute after;
    line 'Total for all departments is: ' sales.sum dollar7.2 '.';
  endcomp;
  where sector='se';
  title 'Sales for the Southeast Sector';
run;
ods html close;
ods pdf close;
ods rtf close;



    
/*************************************************
**************************************************
**                  第四章                       **
**************************************************
*************************************************/
title 'One-Sample t Test';
data casetime;
  input time @@;
  datalines;
43  90  84  87  116   95  86   99   93  92
121 71  66  98   79  102  60  112  105  98
;
run;
proc ttest data=casetime h0=80 alpha=0.1;
   var time;
run;
title;

data tran;
  array y(3);
  do i=1 to 10;
     do j=1 to 3;
        y(j) = uniform(0);
     end;
     output;
  end;
  drop j;
run;

/* 已知方差的Z检验和Wald检验 */
proc means data=samp.class 
    mean std n;
  var height;
  output out=_tmp_1 
    mean=mu std=sigma n=n;
run;
/* Z test, sigma known */
data _null_;
  set _tmp_1;
  file print;
  mu0 = 65;
  sigma0 = 5;
  z = (mu - mu0)/(sigma0 / sqrt(n));
  pvalue = 2*(1 - 
      cdf('normal', abs(z)));
  put 'Z: ' Z 12.4 
      '    Pr>|Z|: ' pvalue PVALUE.;
run;


/* Wald Test, use the estimated sigma as true sigma */
data _null_;
  set _tmp_1;
  file print;
  mu0 = 65;
  sigma0 = sigma; /* Differ with the previous program here */
  z = (mu - mu0)/(sigma0 / sqrt(n));
  pvalue = 2*(1 - 
    cdf('normal', abs(z)));
  put 'Z: ' Z 12.4 
      '    Pr>|Z|: ' pvalue PVALUE.;
run;

/* 某类法律案件平均审理时间是否等于80天的单样本t检验。 */
title 'One-Sample t Test';
data time;
  input time @@;
  datalines;
43  90  84  87  116   95  86   99   93  92
121 71  66  98   79  102  60  112  105  98
;
run;
proc ttest h0=80 alpha=0.1;
   var time;
run;
/* 用PROC UNIVARIATE */
data new;
    set time;
    y = time - 80;
run;
proc univariate data=new;
    var y;
run;
    
/* 正态性检验 */
proc univariate data=samp.gpa normal;
  var gpa;
run;


/* 两独立样本的t检验 */
proc ttest data=samp.gpa;
  class sex;
  var satm;
run;

/* 非正态时用NPAR1WAY过程比较两独立样本均值 */
    
proc npar1way data=samp.gpa wilcoxon;
  class sex;
  var gpa;
run;

/* 用TTEST过程进行成对t检验 */
title 'Paired Comparison';
data pressure;
  input SBPbefore SBPafter @@;
  datalines;
120 128   124 131   130 131   118 127
140 132   128 125   140 141   135 137
126 118   130 132   126 129   127 135
;
run;
proc ttest data=pressure;
  paired SBPbefore*SBPafter;
run;
/* 用PROC UNIVARIATE */
data new;
    set pressure;
    y = SBPbefore - SBPafter;
run;
proc univariate data=new;
    var y;
run;
title;


/* GPA数据集中SATM与SATV的比较 */
proc ttest data=samp.gpa;
  paired satm*satv;
run;

/* 用计算差值的办法进行成对t检验 */
data new;
  set samp.gpa;
  dmv = satm - satv;
  keep dmv;
run;
proc univariate data=new;
  var dmv;
run;

/* 单总体比例的假设检验：
    检验GPA数据集中女生比例是否等于0.6,
    计算置信区间。
*/
proc freq data=samp.gpa;
  tables sex / binomial(p=0.6);
run;
/* 单总体比例的假设检验：
   检验GPA数据集中男生比例是否等于0.5,
   用EXACT BINOMIAL语句要求用二项分布计算精确p值。
*/
proc freq data=samp.gpa;
  tables sex / binomial(level='Male' p=0.5);
  exact binomial;
run;

data scount;
  input sex $ count;
  datalines;
Female 145
Male    79
;
run;

proc freq data=scount;
  tables sex / binomial(level='Male' p=0.5);
  exact binomial;
  weight count;
run;

/* 两总体比例的假设检验:
    检验吸烟人群的患病率是否高于不吸烟人群：
*/
data bron;
  input smoke $ bron $ numcell;
  label smoke='吸烟'  bron='慢性支气管炎';
  cards;
吸烟  患病  43
吸烟  未患  162
不吸烟  患病  13
不吸烟  未患  121
;
proc freq data=bron;
  tables smoke*bron / nopct norow 
           nocol fisher;
  weight numcell;
run;



/******************************************/
/*              回归分析例子               */
/******************************************/



/* 残差图的几种情况 */
/* 非线性因素 */
data samp.regdiag1;
  b0=100; b1=2;  b2=0.5;
  do x=-10 to 10;
    y = b0 + b1*x + b2*x*x + 2*normal(111);
    output;
  end;
  keep x y;
run;

/* 忽略重要变量 */
data samp.regdiag2;
  b0f=100; b0m=200; b1=2;
  s='F';
  do x=10 to 30 by 0.3;
    y = b0f + b1*x + 5*normal(111);
    output;
  end;
  s='M';
  do x=10 to 30 by 0.3;
    y = b0m + b1*x + 5*normal(111);
    output;
  end;
  keep s x y;
run;

/* 离群值 */
data samp.regdiag3;
  b0=100; b1=2;
  do x=10 to 30 by 0.3;
    y = b0 + b1*x + 5*normal(111);
    output;
  end;
  x=12; y=100; output;
  x=25; y=180; output;
  keep x y;
run;

/* 方差非齐性 */
data samp.regdiag4;
  b0=100; b1=2; b2=0.5;
  do x=10 to 30 by 0.3;
    y = b0 + b1*x + 0.25*x*normal(111);
    output;
  end;
  keep x y;
run;

/* 序列自相关 */
data samp.regdiag5;
  b0=100;  b1=2;  b2=0.5;
  i=0; e = 0; e1 = e; 
  do x=10 to 30 by 0.3;
    i+1;
    e = 0.8*e1 + normal(111);
    e1 = e;
    y = b0 + b1*x + e;
    output;
  end;
  keep i x y;
run;

/* 共线 */
data samp.regdiag6;
  b0 = 100;
  b1 = 1;
  b2 = 1;
  do x1=1 to 20;
     x2 = 0.5*x1 + 0.1*normal(0);
     y = b0 + b1*x1 + b2*x2 + 2*normal(0);
     output;
  end;
run;

/* 丢失重要自变量 */
data samp.mireg;
    sex='F';
    do x=10 to 20;
        y = 100 - 2*x + normal(0);
        output;
    end;

    sex='M';
    do x=31 to 40;
        y = 200 -x + normal(0);
        output;
    end;
run;
proc gplot;
  symbol i=none v=star;
  plot y*x;
run;
proc reg;
  model y=x;
  plot y*x / pred;
run;
quit;

/* 应该的解法 */
proc sort data=mireg;
  by sex;
run;
proc reg;
  model y=x;
  by sex;
run;
/* 或者： */
data mireg2;
  set mireg;
  if sex='M' then s=1;
  else s=0;
  xs = x*s;
run;
proc reg;
  model y = x s xs;
  plot residual.*predicted.;
run;
quit;
/* 检验男女的模型是否无差别，系数联合检验 */
proc reg;
  model y = x s xs;
  test1:
      test s=0, xs=0;
run;
quit;
/* 或者用PROC MIXED： */


/* 体重对身高、年龄的回归 */
proc reg data=samp.class;
  var weight height age;
  model weight=height age;
run;
  model weight=height age / selection=stepwise;
run;
  print cli;
run;
  print clm;
run;
  plot weight * height / conf95;
run;
  plot residual. * predicted.;
run;
  plot rstudent. * obs.;
run;
quit;

/* 回归的ODS图形 */
ods graphics on;
proc reg data=samp.class;
    model weight=height;
quit;
ods graphics off;

/* 联合检验 */
proc reg data=samp.fitness;
  model oxygen = age weight runtime
  rstpulse runpulse maxpulse;
  test1: test rstpulse=runpulse,
  rstpulse=maxpulse;
  test2: test weight=0, rstpulse=0;
quit;

/* 美国人口增长数据: 二次多项式回归 */
data USPopulation; 
  input Population @@; 
  retain Year 1780; 
  Year=Year+10; 
  YearSq=Year*Year; 
  Population=Population/1000; 
  datalines; 
3929 5308 7239 9638 12866 17069 
23191 31443 39818 50155 
62947 75994 91972 105710 122775 
131669 151325 179323 203211 
226542 248710 281422 
;
run;
symbol1 c=blue; 
proc reg data=USPopulation; 
  var YearSq; 
  model Population=Year / r cli clm vif; 
  plot r.*p. / cframe=ligr; 
run;
  add YearSq; 
  print; 
  plot / cframe=ligr; 
run;
quit;
/* 中心化自变量以克服高次项和交叉项引起的共线性 */
data new;
  set USPopulation;
  Year = Year - 1880;
  YearSq = Year*Year;
run;
proc reg data=new outest=beta;
  model Population = Year YearSq / collin;
quit;
proc print data=beta;run;

/* dwProb 选项检验序列相关性*/
proc reg data=new;
  model Population = Year YearSq / dwProb;
quit;

/* 用保存的回归系数作回归曲线数据集，与原始数据集纵向合并，作overlay图形 */
data gr;
  set beta(rename=(year=beta1 yearsq=beta2));
  population=.;
  do year=1790 to 2000 by 1;
     x = year - 1880;
     fitted = intercept + beta1*x + beta2*x**2;
     output;
  end;
  drop x;
run;
data gr2;
  set USPopulation gr;
run;
proc gplot data=gr2;
  symbol1 i=none v=square c=black r=1;
  symbol2 i=join v=none c=red w=3;
  plot population*year=1 fitted*year=2 /overlay;
run;
quit;


/* 回归分析: 身体健康指标, 自变量选择 */
*-------------------Data on Physical Fitness-------------------* 
| These measurements were made on men involved in a physical   | 
| fitness course at N.C.State Univ. The variables are Age      | 
| (years), Weight (kg), Oxygen intake rate (ml per kg body     | 
| weight per minute), time to run 1.5 miles (minutes), heart   | 
| rate while resting, heart rate while running (same time      | 
| Oxygen rate measured), and maximum heart rate recorded while | 
| running.                                                     | 
| ***Certain values of MaxPulse were changed for this analysis.| 
*--------------------------------------------------------------*; 
data fitness; 
    input Age Weight Oxygen RunTime RestPulse RunPulse MaxPulse @@; 
    datalines; 
   44 89.47 44.609 11.37 62 178 182   40 75.07 45.313 10.07 62 185 185 
   44 85.84 54.297  8.65 45 156 168   42 68.15 59.571  8.17 40 166 172 
   38 89.02 49.874  9.22 55 178 180   47 77.45 44.811 11.63 58 176 176 
   40 75.98 45.681 11.95 70 176 180   43 81.19 49.091 10.85 64 162 170 
   44 81.42 39.442 13.08 63 174 176   38 81.87 60.055  8.63 48 170 186 
   44 73.03 50.541 10.13 45 168 168   45 87.66 37.388 14.03 56 186 192 
   45 66.45 44.754 11.12 51 176 176   47 79.15 47.273 10.60 47 162 164 
   54 83.12 51.855 10.33 50 166 170   49 81.42 49.156  8.95 44 180 185 
   51 69.63 40.836 10.95 57 168 172   51 77.91 46.672 10.00 48 162 168 
   48 91.63 46.774 10.25 48 162 164   49 73.37 50.388 10.08 67 168 168 
   57 73.37 39.407 12.63 58 174 176   54 79.38 46.080 11.17 62 156 165 
   52 76.32 45.441  9.63 48 164 166   50 70.87 54.625  8.92 48 146 155 
   51 67.25 45.118 11.08 48 172 172   54 91.63 39.203 12.88 44 168 172 
   51 73.71 45.790 10.47 59 186 188   57 59.08 50.545  9.93 49 148 155 
   49 76.32 48.673  9.40 56 186 188   48 61.24 47.920 11.50 52 170 176 
   52 82.78 47.467 10.50 53 170 172 
; 
proc reg data=fitness; 
  model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse 
        / selection=forward; 
  model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse 
        / selection=backward; 
  model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse 
        / selection=maxr; 
run;

/* 回归中简单图形 */
data fitness; 
  set fitness; 
  label Age      ='age(years)' 
        Weight   ='weight(kg)' 
        Oxygen   ='oxygen uptake(ml/kg/min)' 
        RunTime  ='1.5 mile time(min)' 
        RestPulse='rest pulse' 
        RunPulse ='running pulse' 
        MaxPulse ='maximum running pulse';
run;
proc reg data=fitness; 
  model Oxygen=RunTime; 
  plot Oxygen*RunTime / cframe=ligr; 
run;

/* 在回归中用图形参数控制图形样式 */
goptions ctitle=black   htitle=3.5pct ftitle=swiss 
         ctext =magenta htext =3.0pct ftext =swiss 
         cback =ligr    border; 
symbol1 v=circle c=red h=1 w=2; 
title1 'Selection=Rsquare'; 
title2 'plot Rsquare versus the number of parameters P in ' 
       'each model'; 
proc reg data=fitness; 
  model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse 
        / selection=rsquare noprint; 
  plot rsq.*np. 
       / aic bic edf gmsep jp np pc sbc sp 
         haxis=2 to 7 by 1 
         caxis=red cframe=white ctext=blue 
         modellab='Full Model' modelht=2.4 
         statht=2.4; 
run;

/* 回归的QQ图和PP图 */
data annote1; 
  length function color $8; 
  retain ysys xsys '2' color 'black'; 
  function='move'; 
  x=0; 
  y=0; 
  output; 
  function='draw'; 
  x=1; 
  y=1; 
  output; 
run; 
symbol1 c=blue; 
proc reg data=fitness; 
  title 'PP Plot'; 
  model Oxygen=RunTime / noprint; 
  plot npp.*r. 
       / annotate=annote1 nostat cframe=ligr 
         modellab="'Best' Two-Parameter Model:"; 
run; 
  title 'QQ Plot'; 
  plot r.*nqq. 
       / noline mse cframe=ligr 
         modellab="'Best' Two-Parameter Model:"; 
run;
quit;

/* 回归预测和置信限图 */
legend1 position=(bottom left inside) 
        across=1 cborder=red offset=(0,0) 
        shape=symbol(3,1) label=none 
        value=(height=.8); 
title 'Prediction Intervals'; 
symbol1 c=yellow v=- h=1; 
symbol2 c=red; 
symbol3 c=blue; 
symbol4 c=blue; 

proc reg data=fitness; 
  model Oxygen=RunTime / noprint; 
  plot Oxygen*RunTime / pred nostat mse aic bic 
       caxis=red ctext=blue cframe=ligr  
       legend=legend1 modellab='         '; 
run;
quit;


/* 体重对身高和年龄的回归，分男女生分别估计 */
*------------Data on Age, Weight, and Height of Children-------* 
| Age (months), height (inches), and weight (pounds) were      | 
| recorded for a group of school children.                     | 
| From Lewis and Taylor (1967).                                | 
*--------------------------------------------------------------*; 
 
data htwt; 
  input sex $ age :3.1 height weight @@; 
  datalines; 
   f 143 56.3  85.0 f 155 62.3 105.0 f 153 63.3 108.0 f 161 59.0  92.0 
   f 191 62.5 112.5 f 171 62.5 112.0 f 185 59.0 104.0 f 142 56.5  69.0 
   f 160 62.0  94.5 f 140 53.8  68.5 f 139 61.5 104.0 f 178 61.5 103.5 
   f 157 64.5 123.5 f 149 58.3  93.0 f 143 51.3  50.5 f 145 58.8  89.0 
   f 191 65.3 107.0 f 150 59.5  78.5 f 147 61.3 115.0 f 180 63.3 114.0 
   f 141 61.8  85.0 f 140 53.5  81.0 f 164 58.0  83.5 f 176 61.3 112.0 
   f 185 63.3 101.0 f 166 61.5 103.5 f 175 60.8  93.5 f 180 59.0 112.0 
   f 210 65.5 140.0 f 146 56.3  83.5 f 170 64.3  90.0 f 162 58.0  84.0 
   f 149 64.3 110.5 f 139 57.5  96.0 f 186 57.8  95.0 f 197 61.5 121.0 
   f 169 62.3  99.5 f 177 61.8 142.5 f 185 65.3 118.0 f 182 58.3 104.5 
   f 173 62.8 102.5 f 166 59.3  89.5 f 168 61.5  95.0 f 169 62.0  98.5 
   f 150 61.3  94.0 f 184 62.3 108.0 f 139 52.8  63.5 f 147 59.8  84.5 
   f 144 59.5  93.5 f 177 61.3 112.0 f 178 63.5 148.5 f 197 64.8 112.0 
   f 146 60.0 109.0 f 145 59.0  91.5 f 147 55.8  75.0 f 145 57.8  84.0 
   f 155 61.3 107.0 f 167 62.3  92.5 f 183 64.3 109.5 f 143 55.5  84.0 
   f 183 64.5 102.5 f 185 60.0 106.0 f 148 56.3  77.0 f 147 58.3 111.5 
   f 154 60.0 114.0 f 156 54.5  75.0 f 144 55.8  73.5 f 154 62.8  93.5 
   f 152 60.5 105.0 f 191 63.3 113.5 f 190 66.8 140.0 f 140 60.0  77.0 
   f 148 60.5  84.5 f 189 64.3 113.5 f 143 58.3  77.5 f 178 66.5 117.5 
   f 164 65.3  98.0 f 157 60.5 112.0 f 147 59.5 101.0 f 148 59.0  95.0 
   f 177 61.3  81.0 f 171 61.5  91.0 f 172 64.8 142.0 f 190 56.8  98.5 
   f 183 66.5 112.0 f 143 61.5 116.5 f 179 63.0  98.5 f 186 57.0  83.5 
   f 182 65.5 133.0 f 182 62.0  91.5 f 142 56.0  72.5 f 165 61.3 106.5 
   f 165 55.5  67.0 f 154 61.0 122.5 f 150 54.5  74.0 f 155 66.0 144.5 
   f 163 56.5  84.0 f 141 56.0  72.5 f 147 51.5  64.0 f 210 62.0 116.0 
   f 171 63.0  84.0 f 167 61.0  93.5 f 182 64.0 111.5 f 144 61.0  92.0 
   f 193 59.8 115.0 f 141 61.3  85.0 f 164 63.3 108.0 f 186 63.5 108.0 
   f 169 61.5  85.0 f 175 60.3  86.0 f 180 61.3 110.5 m 165 64.8  98.0 
   m 157 60.5 105.0 m 144 57.3  76.5 m 150 59.5  84.0 m 150 60.8 128.0 
   m 139 60.5  87.0 m 189 67.0 128.0 m 183 64.8 111.0 m 147 50.5  79.0 
   m 146 57.5  90.0 m 160 60.5  84.0 m 156 61.8 112.0 m 173 61.3  93.0 
   m 151 66.3 117.0 m 141 53.3  84.0 m 150 59.0  99.5 m 164 57.8  95.0 
   m 153 60.0  84.0 m 206 68.3 134.0 m 250 67.5 171.5 m 176 63.8  98.5 
   m 176 65.0 118.5 m 140 59.5  94.5 m 185 66.0 105.0 m 180 61.8 104.0 
   m 146 57.3  83.0 m 183 66.0 105.5 m 140 56.5  84.0 m 151 58.3  86.0 
   m 151 61.0  81.0 m 144 62.8  94.0 m 160 59.3  78.5 m 178 67.3 119.5 
   m 193 66.3 133.0 m 162 64.5 119.0 m 164 60.5  95.0 m 186 66.0 112.0 
   m 143 57.5  75.0 m 175 64.0  92.0 m 175 68.0 112.0 m 175 63.5  98.5 
   m 173 69.0 112.5 m 170 63.8 112.5 m 174 66.0 108.0 m 164 63.5 108.0 
   m 144 59.5  88.0 m 156 66.3 106.0 m 149 57.0  92.0 m 144 60.0 117.5 
   m 147 57.0  84.0 m 188 67.3 112.0 m 169 62.0 100.0 m 172 65.0 112.0 
   m 150 59.5  84.0 m 193 67.8 127.5 m 157 58.0  80.5 m 168 60.0  93.5 
   m 140 58.5  86.5 m 156 58.3  92.5 m 156 61.5 108.5 m 158 65.0 121.0 
   m 184 66.5 112.0 m 156 68.5 114.0 m 144 57.0  84.0 m 176 61.5  81.0 
   m 168 66.5 111.5 m 149 52.5  81.0 m 142 55.0  70.0 m 188 71.0 140.0 
   m 203 66.5 117.0 m 142 58.8  84.0 m 189 66.3 112.0 m 188 65.8 150.5 
   m 200 71.0 147.0 m 152 59.5 105.0 m 174 69.8 119.5 m 166 62.5  84.0 
   m 145 56.5  91.0 m 143 57.5 101.0 m 163 65.3 117.5 m 166 67.3 121.0 
   m 182 67.0 133.0 m 173 66.0 112.0 m 155 61.8  91.5 m 162 60.0 105.0 
   m 177 63.0 111.0 m 177 60.5 112.0 m 175 65.5 114.0 m 166 62.0  91.0 
   m 150 59.0  98.0 m 150 61.8 118.0 m 188 63.3 115.5 m 163 66.0 112.0 
   m 171 61.8 112.0 m 162 63.0  91.0 m 141 57.5  85.0 m 174 63.0 112.0 
   m 142 56.0  87.5 m 148 60.5 118.0 m 140 56.8  83.5 m 160 64.0 116.0 
   m 144 60.0  89.0 m 206 69.5 171.5 m 159 63.3 112.0 m 149 56.3  72.0 
   m 193 72.0 150.0 m 194 65.3 134.5 m 152 60.8  97.0 m 146 55.0  71.5 
   m 139 55.0  73.5 m 186 66.5 112.0 m 161 56.8  75.0 m 153 64.8 128.0 
   m 196 64.5  98.0 m 164 58.0  84.0 m 159 62.8  99.0 m 178 63.8 112.0 
   m 153 57.8  79.5 m 155 57.3  80.5 m 178 63.5 102.5 m 142 55.0  76.0 
   m 164 66.5 112.0 m 189 65.0 114.0 m 164 61.5 140.0 m 167 62.0 107.5 
   m 151 59.3  87.0 
; 
run; 

title '----- Data on age, weight, and height of children ------'; 
proc reg outest=est1 outsscp=sscp1 rsquare; 
  by sex; 
  eq1: model  weight=height; 
  eq2: model  weight=height age; 
proc print data=sscp1; 
  title2 'SSCP type data set'; 
proc print data=est1; 
  title2 'EST type data set'; 
run;



/* 带有示性变量(二值分类变量)自变量的回归。
 * 因变量为保险公司采纳新险种的时间。自变量为公司规模和种类，种类是0，1变量，
 * 0表示mutual fund类型，1表示stock类型。
 * 种类不同有不同的截距项和斜率项，不同的斜率项用数据步生成的sizetype变量表示。
 */
title 'Regression With Quantitative and Qualitative Variables'; 
data insurance; 
  input time size type @@; 
  sizetype=size*type; 
  datalines; 
   17 151 0   26  92 0   21 175 0   30  31 0   22 104 0 
    0 277 0   12 210 0   19 120 0    4 290 0   16 238 0 
   28 164 1   15 272 1   11 295 1   38  68 1   31  85 1 
   21 224 1   20 166 1   13 305 1   30 124 1   14 246 1 
; 
run;
proc reg data=insurance; 
  model time = size type sizetype; 
run;
  delete sizetype; 
  print; 
run;
  output out=out r=r p=p; 
run; 
quit;
symbol1 v='0' c=blue   f=swissb; 
symbol2 v='1' c=yellow f=swissb; 
axis1 label=(angle=90); 
proc gplot data=out; 
  plot r*p=type    / nolegend vaxis=axis1 cframe=ligr; 
  plot p*size=type / nolegend vaxis=axis1 cframe=ligr; 
run;







/******************************************/
/*              方差分析例子               */
/******************************************/


proc anova data=samp.veneer;
  class brand;
  model wear=brand;
run;
quit;


proc npar1way data=samp.veneer wilcoxon;
  class brand;
  var wear;
run;

proc anova data=samp.veneer;
  class brand;
  model wear=brand;
  means brand;
run;
  means brand / t;
run;
  means brand / bon;
run;
  means brand/ regwq;
run;


data rubber;
  input A B STREN;
  cards;
1 1 31
1 1 33
1 2 34
1 2 36
1 3 35
1 3 36
1 4 39
1 4 38
2 1 33
……………
;
run;


**************************************************;
data rubber;
  do  a=1 to 3;
    do  b=1 to 4;
      do  r=1 to 2;
        input stren @@;
        output;
      end;
    end;
  end;
  cards;
31 33  34 36  35 36  39 38
33 34  36 37  37 39  38 41
35 37  37 38  39 40  42 44
;
run;


**************************************************;
proc anova data=rubber;
  class a b;
  model  stren = a b a*b;
run;


**************************************************;
proc anova data=rubber;
  class a b;
  model stren = a b;
run;
  means a b;
run;


**************************************************;
data  exp;
  input temp time conc manu mix  prod;
  cards;
1 1 1 1 1  65
1 1 1 2 2  74
1 2 2 1 2  71
1 2 2 2 1  73
2 1 2 1 2  70
2 1 2 2 1  73
2 2 1 1 1  62
2 2 1 2 2  69
;
run;
proc anova data=exp;
  class temp time conc manu mix;
  model prod = temp--mix;
  means temp--mix / t;
run;

/* One sample proportion test, macro */
%MACRO percentzt(n,n1,p0);
data _null_;
  file print;
  p0 = &p0.; n = &n.; n1 = &n1.;
  xbar = n1/n;
  Z = (xbar - p0)/sqrt(p0 * (1-p0)/n);
  ptwosided = 2*(1 - probnorm(abs(Z)));
  *prightsided = 1 - probnorm(Z);
  *pleftsided = probnorm(Z);
  put '===== Test for percent =====';
  put 'n = ' n  '  p =' xbar;
  put 'p0 = ' p0;
  put 'Z = ' Z;
  put 'Pr > |Z|: ' ptwosided pvalue.;
  *put 'Pr > Z:  ' prightsided pvalue.;
  *put 'Pr < Z:  ' pleftsided pvalue.;
run;
%MEND percentzt;
%percentzt(100,5,0.08);

/* 单总体比例检验 */
proc freq data=samp.class;
  tables sex / binomial(p=0.5 level=2);
  exact binomial;
run;

data aa;
  input sex $ nums;
  cards;
F 9
M 10
;
run;
proc freq data=aa;
  tables sex / binomial(p=0.5 level=2);
  exact binomial;
  weight nums;
run;

data accident;
  input day times @@;
  cards;
1 9  2 10  3 11  4 8  5 13  6 12
;
run;
proc freq data=accident;
  tables day / chisq 
    testp=(0.1666667 0.1666667 0.1666667 
           0.1666667 0.1666667 0.1666667);
  weight times;
run;

%macro check_accident;
data accident;
  input day times @@;
  cards;
1 9  2 10  3 11  4 8  5 13  6 12
;
run;
%let onesix = %sysevalf(1/6);
proc freq data=accident;
  tables day / chisq 
    testp=(&onesix &onesix &onesix 
           &onesix &onesix &onesix);
  weight times;
run;
%mend check_accident;


**************************************************;
data class;
  input sno sex $ from $;
  label sex='性别' from='来源';
  cards;
1  男  本地
2  女  外地
3  男  外地
…………/* 所有学生的记录 */
;


**************************************************;
proc freq data=class;
  tables from * sex;
run;


**************************************************;
data classt;
  input from $ sex $ numcell;
  label sex='性别' from='来源';
  cards;
本地  男  4
本地  女  6
外地  男  14
外地  女  7
;
run;



**************************************************;
proc freq data=classt;
  tables from * sex;
  weight numcell;
run;


**************************************************;
proc freq data=classt;
  tables from * sex / nopct norow nocol;
  weight numcell;
run;


**************************************************;
data bron;
  input smoke $ bron $ numcell;
  label smoke='吸烟'  bron='慢性支气管炎';
  cards;
吸烟  患病  43
吸烟  未患  162
不吸烟  患病  13
不吸烟  未患  121
;
proc freq data=bron;
  tables smoke*bron / nopct norow 
           nocol  chisq expected;
  weight numcell;
run;

/* Two-sample proportion test, macro */
%MACRO percent2z(n1,s1,n2,s2);
data _null_;
  file print;
  n1=&n1; s1=&s1; n2=&n2; s2=&s2;
  hatp = (s1+s2)/(n1+n2);
  hatp1 = s1/n1; hatp2 = s2/n2;
  Z2s = (hatp1 - hatp2) / 
    sqrt(hatp*(1-hatp)*(1/n1 + 1/n2));
  Z1s = (hatp1 - hatp2) / 
    sqrt(hatp1*(1-hatp1)/n1
	    +hatp2*(1-hatp2)/n2);
  ptwosided = 2*(1 - probnorm(abs(Z2s)));
  prightsided = 1 - probnorm(Z1s);
  pleftsided = probnorm(Z1s);
  put '===== Test for percent =====';
  put 'n1 = ' n1  '  s1 =' s1  '  p1=' hatp1;
  put 'n2 = ' n2  '  s2 =' s2  '  p2=' hatp2;
  put 'Pr > |Z|: ' ptwosided pvalue.;
  put 'Pr > Z:   ' prightsided pvalue.;
  put 'Pr < Z:   ' pleftsided pvalue.;
run;
%MEND percent2z;
%percent2z(100,10,100,8);

**************************************************;
data cows;
  input herdsize  disease  numcell;
  label herdsize='牛群大小'  
        disease='患病程度';
  cards;
1 0 9
1 1 5
1 2 9
2 0 18
2 1 4
2 2 19
3 0 11
3 1 88
3 2 136
;
run;


**************************************************;
proc freq data=cows;
  tables herdsize*disease / measures 
      chisq nopercent nocol;
  weight numcell;
  title '奶牛疾病数据分析';
run;






DATA TEMPERAT;
   INPUT CITY $1-15 JANUARY JULY;
   CARDS;
MOBILE           10.7  27.6
PHOENIX          10.7  32.9
LITTLE ROCK       4.2  27.4
SACRAMENTO        7.3  24.0
DENVER           -1.2  22.8
HARTFORD         -4.0  22.6
WILMINGTON        0.0  24.3
WASHINGTON DC     2.0  25.9
JACKSONVILLE     12.6  27.2
MIAMI            19.6  27.9
ATLANTA           5.8  25.6
BOISE            -1.7  23.6
CHICAGO          -5.1  22.2
PEORIA           -4.6  23.9
INDIANAPOLIS     -2.3  23.9
DES MOINES       -7.0  23.9
WICHITA          -0.4  27.1
LOUISVILLE        0.7  24.9
NEW ORLEANS      11.6  27.7
PORTLAND, MAINE  -5.8  20.0
BALTIMORE         0.8  24.8
BOSTON           -1.6  22.9
DETROIT          -3.6  22.9
SAULT STE MARIE  -9.9  17.7
DULUTH          -13.1  18.7
MINNEAPOLIS     -11.0  22.2
JACKSON           8.4  27.6
KANSAS CITY      -2.3  26.0
ST LOUIS         -0.4  25.9
GREAT FALLS      -6.4  20.7
OMAHA            -5.2  25.1
RENO             -0.1  20.7
CONCORD          -6.3  20.9
ATLANTIC CITY     0.4  23.9
ALBUQUERQUE       1.8  25.9
ALBANY           -5.8  22.2
BUFFALO          -4.6  21.2
NEW YORK          0.1  24.8
CHARLOTTE         5.6  25.8
RALEIGH           4.7  25.3
BISMARCK        -13.2  21.6
CINCINNATI       -0.5  24.2
CLEVELAND        -2.8  21.9
COLUMBUS         -2.0  23.1
OKLAHOMA CITY     2.7  27.5
PORTLAND, OREG    3.4  19.5
PHILADELPHIA      0.2  24.9
PITTSBURGH       -2.2  22.2
PROVIDENCE       -2.0  22.3
COLUMBIA          7.4  27.3
SIOUX FALLS      -9.9  22.9
MEMPHIS           4.7  26.4
NASHVILLE         3.5  26.4
DALLAS            7.1  29.3
EL PASO           6.4  27.9
HOUSTON          11.2  28.5
SALT LAKE CITY   -2.2  24.8
BURLINGTON       -8.4  21.0
NORFOLK           4.7  25.7
RICHMOND          3.1  25.5
SPOKANE          -3.7  20.9
CHARLESTON, WV    1.4  23.9
MILWAUKEE        -7.0  21.1
CHEYENNE         -3.0  20.6
;
PROC PRINCOMP COV OUT=PRIN;
  VAR JULY JANUARY;
RUN;


DATA CRIME;
   TITLE '各州每十万人的犯罪率';
   INPUT STATE $1-15 MURDER RAPE ROBBERY ASSAULT 
         BURGLARY LARCENY AUTO;
   CARDS;
ALABAMA        14.2 25.2  96.8 278.3 1135.5 1881.9 280.7
ALASKA         10.8 51.6  96.8 284.0 1331.7 3369.8 753.3
ARIZONA         9.5 34.2 138.2 312.3 2346.1 4467.4 439.5
ARKANSAS        8.8 27.6  83.2 203.4  972.6 1862.1 183.4
CALIFORNIA     11.5 49.4 287.0 358.0 2139.4 3499.8 663.5
COLORADO        6.3 42.0 170.7 292.9 1935.2 3903.2 477.1
CONNECTICUT     4.2 16.8 129.5 131.8 1346.0 2620.7 593.2
DELAWARE        6.0 24.9 157.0 194.2 1682.6 3678.4 467.0
FLORIDA        10.2 39.6 187.9 449.1 1859.9 3840.5 351.4
GEORGIA        11.7 31.1 140.5 256.5 1351.1 2170.2 297.9
HAWAII          7.2 25.5 128.0  64.1 1911.5 3920.4 489.4
IDAHO           5.5 19.4  39.6 172.5 1050.8 2599.6 237.6
ILLINOIS        9.9 21.8 211.3 209.0 1085.0 2828.5 528.6
INDIANA         7.4 26.5 123.2 153.5 1086.2 2498.7 377.4
IOWA            2.3 10.6  41.2  89.8  812.5 2685.1 219.9
KANSAS          6.6 22.0 100.7 180.5 1270.4 2739.3 244.3
KENTUCKY       10.1 19.1  81.1 123.3  872.2 1662.1 245.4
LOUISIANA      15.5 30.9 142.9 335.5 1165.5 2469.9 337.7
MAINE           2.4 13.5  38.7 170.0 1253.1 2350.7 246.9
MARYLAND        8.0 34.8 292.1 358.9 1400.0 3177.7 428.5
MASSACHUSETTS   3.1 20.8 169.1 231.6 1532.2 2311.3 1140.1
MICHIGAN        9.3 38.9 261.9 274.6 1522.7 3159.0 545.5
MINNESOTA       2.7 19.5  85.9  85.8 1134.7 2559.3 343.1
MISSISSIPPI    14.3 19.6  65.7 189.1  915.6 1239.9 144.4
MISSOURI        9.6 28.3 189.0 233.5 1318.3 2424.2 378.4
MONTANA         5.4 16.7  39.2 156.8  804.9 2773.2 309.2
NEBRASKA        3.9 18.1  64.7 112.7  760.0 2316.1 249.1
NEVADA         15.8 49.1 323.1 355.0 2453.1 4212.6 559.2
NEW HAMPSHIRE   3.2 10.7  23.2  76.0 1041.7 2343.9 293.4
NEW JERSEY      5.6 21.0 180.4 185.1 1435.8 2774.5 511.5
NEW MEXICO      8.8 39.1 109.6 343.4 1418.7 3008.6 259.5
NEW YORK       10.7 29.4 472.6 319.1 1728.0 2782.0 745.8
NORTH CAROLINA 10.6 17.0  61.3 318.3 1154.1 2037.8 192.1
NORTH DAKOTA    0.9  9.0  13.3  43.8  446.1 1843.0 144.7
OHIO            7.8 27.3 190.5 181.1 1216.0 2696.8 400.4
OKLAHOMA        8.6 29.2  73.8 205.0 1288.2 2228.1 326.8
OREGON          4.9 39.9 124.1 286.9 1636.4 3506.1 388.9
PENNSYLVANIA    5.6 19.0 130.3 128.0  877.5 1624.1 333.2
RHODE ISLAND    3.6 10.5  86.5 201.0 1489.5 2844.1 791.4
SOUTH CAROLINA 11.9 33.0 105.9 485.3 1613.6 2342.4 245.1
SOUTH DAKOTA    2.0 13.5  17.9 155.7  570.5 1704.4 147.5
TENNESSEE      10.1 29.7 145.8 203.9 1259.7 1776.5 314.0
TEXAS          13.3 33.8 152.4 208.2 1603.1 2988.7 397.6
UTAH            3.5 20.3  68.8 147.3 1171.6 3004.6 334.5
VERMONT         1.4 15.9  30.8 101.2 1348.2 2201.0 265.2
VIRGINIA        9.0 23.3  92.1 165.7  986.2 2521.2 226.7
WASHINGTON      4.3 39.6 106.2 224.8 1605.6 3386.9 360.3
WEST VIRGINIA   6.0 13.2  42.2  90.9  597.4 1341.7 163.3
WISCONSIN       2.8 12.9  52.2  63.7  846.9 2614.2 220.7
WYOMING         5.4 21.9  39.7 173.9  811.6 2772.2 282.0
;
PROC PRINCOMP OUT=CRIMCOMP;
RUN;

PROC SORT;
   BY PRIN1;
PROC PRINT;
   ID STATE;
   VAR PRIN1 PRIN2 MURDER RAPE ROBBERY ASSAULT 
       BURGLARY LARCENY AUTO;
   TITLE2 '各州按第一主分量作为总犯罪率排列';
PROC SORT;
   BY PRIN2;
PROC PRINT;
   ID STATE;
   VAR PRIN1 PRIN2 MURDER RAPE ROBBERY ASSAULT 
       BURGLARY LARCENY AUTO;
   TITLE2 '各州按第二主分量作为金钱犯罪与暴力犯罪对比的排列';
GOPTIONS FTEXT='宋体';
PROC GPLOT;
   PLOT PRIN2*PRIN1=STATE;
   TITLE2 '前两个主分量的散点图';
PROC GPLOT;
   PLOT PRIN3*PRIN1=STATE;
   TITLE2 '第一、三主分量的散点图';
RUN;


/* 警察14个评价指标的因子分析. 103个警察，14个评价指标，
 * 试图找出评价指标的内在因素。
 */
options validvarname=any;
data jobratings;
      input ('Communication Skills'n
             'Problem Solving'n
             'Learning Ability'n
             'Judgment Under Pressure'n
             'Observational Skills'n
             'Willingness to Confront Problems'n
             'Interest in People'n
             'Interpersonal Sensitivity'n
             'Desire for Self-Improvement'n
             'Appearance'n
             'Dependability'n
             'Physical Ability'n
             'Integrity'n
             'Overall Rating'n) (1.);
      datalines;
26838853879867
74758876857667
56757863775875
67869777988997
99997798878888
89897899888799
89999889899798
87794798468886
35652335143113
89888879576867
76557899446397
97889998898989
76766677598888
77667676779677
63839932588856
25738811284915
88879966797988
87979877959679
87989975878798
99889988898888
78876765687677
88889888899899
88889988878988
67646577384776
78778788799997
76888866768667
67678665746776
33424476664855
65656765785766
54566676565866
56655566656775
88889988868887
89899999898799
98889999899899
57554776468878
53687777797887
68666716475767
78778889798997
67364767565846
77678865886767
68698955669998
55546866663886
68888999998989
97787888798999
76677899799997
44754687877787
77876678798888
76668778799797
57653634361543
76777745653656
76766665656676
88888888878789
88977888869778
58894888747886
58674565473676
76777767777777
77788878789798
98989987999868
66729911474713
98889976999988
88786856667748
77868887897889
99999986999999
46688587616886
66755778486776
87777788889797
65666656545976
73574488887687
74755556586596
76677778789797
87878746777667
86776955874877
77888767778678
65778787778997
58786887787987
65787766676778
86777875468777
67788877757777
77778967855867
67887876767777
24786585535866
46532343542533
35566766676784
11231214211211
76886588536887
57784788688589
56667766465666
66787778778898
77687998877997
76668888546676
66477987589998
86788976884597
77868765785477
99988888987888
65948933886457
99999877988898
96636736876587
98676887798968
87878877898979
88897888888788
99997899799799
99899899899899
76656399567486
;
run;
proc factor data=jobratings(drop='Overall Rating'n)
    priors=smc rotate=varimax
    score outstat=jobstat;
run;
proc score data=jobratings(drop='Overall Rating'n)
    score=jobstat out=jobscore;
run;





DATA SOCECON;
   TITLE '五个经济指标的分析';
   INPUT POP SCHOOL EMPLOY SERVICES HOUSE;
   CARDS;
5700     12.8    2500    270     25000
1000     10.9    600     10      10000
3400     8.8     1000    10      9000
3800     13.6    1700    140     25000
4000     12.8    1600    140     25000
8200     8.3     2600    60      12000
1200     11.4    400     10      16000
9100     11.5    3300    60      14000
9900     12.5    3400    180     18000
9600     13.7    3600    390     25000
9600     9.6     3300    80      12000
9400     11.4    4000    100     13000
;
PROC FACTOR DATA=SOCECON priors=smc;
    TITLE2 '主因子分析';
RUN;

PROC FACTOR DATA=SOCECON PRIORS=SMC 
            ROTATE=PROMAX REORDER;
  TITLE2 '主因子分析及PROMAX斜交旋转';
RUN;
PROC FACTOR DATA=SOCECON PRIORS=SMC 
     ROTATE=VARIMAX REORDER SCORE OUTSTAT=OUTF;
  TITLE2 '主因子分析及VARIMAX正交旋转';
RUN;

PROC SCORE DATA=SOCECON SCORE=OUTF OUT=OUTS;
  TITLE2 ' VARIMAX正交旋转后的主因子得分';
RUN;



/* 五种作物遥感数据的判别分析 */
data crops;
   title '五种作物遥感数据的判别分析';
   input crop $ 1-10 x1-x4 xvalues $ 11-21;
   cards;
CORN      16 27 31 33
CORN      15 23 30 30
CORN      16 27 27 26
CORN      18 20 25 23
CORN      15 15 31 32
CORN      15 32 32 15
CORN      12 15 16 73
SOYBEANS  20 23 23 25
SOYBEANS  24 24 25 32
SOYBEANS  21 25 23 24
SOYBEANS  27 45 24 12
SOYBEANS  12 13 15 42
SOYBEANS  22 32 31 43
COTTON    31 32 33 34
COTTON    29 24 26 28
COTTON    34 32 28 45
COTTON    26 25 23 24
COTTON    53 48 75 26
COTTON    34 35 25 78
SUGARBEETS22 23 25 42
SUGARBEETS25 25 24 26
SUGARBEETS34 25 16 52
SUGARBEETS54 23 21 54
SUGARBEETS25 43 32 15
SUGARBEETS26 54  2 54
CLOVER    12 45 32 54
CLOVER    24 58 25 34
CLOVER    87 54 61 21
CLOVER    51 31 31 16
CLOVER    96 48 54 62
CLOVER    31 31 11 11
CLOVER    56 13 13 71
CLOVER    32 13 27 32
CLOVER    36 26 54 32
CLOVER    53 08 06 54
CLOVER    32 32 62 16
;
run;

proc discrim data=crops outstat=cropstat
             method=normal pool=yes
             list crossvalidate;
   class crop;
   priors proportional;
   id xvalues;
   var x1-x4;
   title2 '使用线性判别函数';
run;

data test;
   input crop $ 1-10 x1-x4 xvalues $ 11-21;
   cards;
CORN      16 27 31 33
SOYBEANS  21 25 23 24
COTTON    29 24 26 28
SUGARBEETS54 23 21 54
CLOVER    32 32 62 16
;

proc discrim data=cropstat 
     testdata=test testout=tout testlist;
   class crop;
   testclass crop;
   testid xvalues;
   var x1-x4;
   title2 '检验数据的判别';
run;

proc print data=tout;
   title2 '检验数据的判别结果';
run;


/* 鸢尾花数据的聚类分析 */
proc cluster data=samp.iris method=ward 
             outtree=otree pseudo ccc;
  var petallen petalwid sepallen sepalwid;
  copy species;
run;
proc tree data=otree graphics 
     horizontal nclusters=3 out=oclust;
  copy species;
  where _ncl_ <= 30;
run;

proc freq data=oclust;
  tables species*cluster /
      nopct norow nocol;
run;